Mercurial > repos > galaxyp > peptideshaker
view peptide_shaker.xml @ 41:3634c90301af draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/peptideshaker commit dea56622eb32b82c87dc87567dc89db3a787033d
author | galaxyp |
---|---|
date | Sat, 28 Oct 2017 04:16:50 -0400 |
parents | 79e37eff006d |
children | f35bb9d0c93e |
line wrap: on
line source
<tool id="peptide_shaker" name="Peptide Shaker" version="1.16.4"> <description> Perform protein identification using various search engines based on results from SearchGUI </description> <macros> <import>macros.xml</import> </macros> <requirements> <requirement type="package" version="1.16.4">peptide-shaker</requirement> </requirements> <expand macro="stdio" /> <command> <![CDATA[ #from datetime import datetime #set $exp_str = "Galaxy_Experiment_%s" % datetime.now().strftime("%Y%m%d%H%M%s") #set $samp_str = "Sample_%s" % datetime.now().strftime("%Y%m%d%H%M%s") #set $temp_stderr = "peptideshaker_stderr" #set $bin_dir = "bin" mkdir output_reports; cwd=`pwd`; ln -s '$searchgui_input' searchgui_input.zip && jar xvf searchgui_input.zip SEARCHGUI_IdentificationParameters.par && ###################### ## PeptideShakerCLI ## ###################### (peptide-shaker eu.isas.peptideshaker.cmd.PeptideShakerCLI --exec_dir="\$cwd/${bin_dir}" -temp_folder \$cwd/PeptideShakerCLI -experiment '$exp_str' -sample '$samp_str' -replicate 1 -identification_files \$cwd/searchgui_input.zip -id_params \$cwd/SEARCHGUI_IdentificationParameters.par -out \$cwd/peptideshaker_output.cpsx -zip \$cwd/peptideshaker_output.zip -threads "\${GALAXY_SLOTS:-12}" ##Optional processing parameters: #if $processing_options.processing_options_selector == "yes" -protein_fdr "${processing_options.protein_fdr}" -peptide_fdr "${processing_options.peptide_fdr}" -psm_fdr "${processing_options.psm_fdr}" -ptm_score "${processing_options.ptm_score.ptm_score_selector}" #if $processing_options.ptm_score.ptm_score_selector == 1 -score_neutral_losses "${processing_options.ptm_score.neutral_losses}" #if str($processing_options.ptm_score.ptm_threshold) != '' -ptm_threshold "${processing_options.ptm_score.ptm_threshold}" #end if #end if -ptm_alignment "${processing_options.ptm_alignment}" -ptm_sequence_matching_type "${processing_options.ptm_sequence_matching_type}" -protein_fraction_mw_confidence "${processing_options.protein_fraction_mw_confidence}" #end if ##Optional filtering parameters: #if $filtering_options.filtering_options_selector == "yes": -import_peptide_length_min "${filtering_options.min_peptide_length}" -import_peptide_length_max "${filtering_options.max_peptide_length}" -import_precurosor_mz "${filtering_options.max_precursor_error}" -import_precurosor_mz_ppm "${filtering_options.max_precursor_error_type}" ##-max_xtandem_e "${filtering_options.max_xtandem_e}" ##-max_omssa_e "${filtering_options.max_omssa_e}" ##-max_mascot_e "${filtering_options.max_mascot_e}" -exclude_unknown_ptms "${filtering_options.exclude_unknown_ptms}" #end if 2>> $temp_stderr) && echo "Running Reports"; ################################## ## PeptideShaker Report options ## ################################## #if 'mzidentML' in str($outputs).split(','): echo "Generating mzIdentML"; (peptide-shaker eu.isas.peptideshaker.cmd.MzidCLI --exec_dir="\$cwd/${bin_dir}" -in \$cwd/peptideshaker_output.zip -output_file \$cwd/output.mzid #if $contact_options.contact_options_selector == "yes": -contact_first_name "$contact_options.contact_first_name" -contact_last_name "$contact_options.contact_last_name" -contact_email "$contact_options.contact_email" -contact_address "$contact_options.contact_address" #if str($contact_options.contact_url).strip() != '': -contact_url = "$contact_options.contact_url" #end if -organization_name "$contact_options.organization_name" -organization_email "$contact_options.organization_email" -organization_address "$contact_options.organization_address" #if str($contact_options.organization_url).strip() != '': -organization_url = "$contact_options.organization_url" #end if #else: -contact_first_name "Proteomics" -contact_last_name "Galaxy" -contact_email "galaxyp@umn.edu" -contact_address "galaxyp@umn.edu" -organization_name "University of Minnesota" -organization_email "galaxyp@umn.edu" -organization_address "Minneapolis, MN 55455, Vereinigte Staaten" #end if 2>> $temp_stderr) && #end if ## Generate Reports if the user has selected one of the 8 additional reports ## 'cps', 'mzidentML' and 'zip' are not valid options for PeptideShaker ## and will not be passed to the command line #if set(["0","1","2","3","4","5","6","7"]).intersection( set( str( $outputs ).split(',') ) ): (peptide-shaker eu.isas.peptideshaker.cmd.ReportCLI --exec_dir="\$cwd/${bin_dir}" -temp_folder \$cwd/ReportCLI -in \$cwd/peptideshaker_output.zip -out_reports \$cwd/output_reports #set $cleaned_list = str($outputs).split(',') #if 'cps' in $cleaned_list: #silent $cleaned_list.remove('cps') #end if #if 'mzidentML' in $cleaned_list: #silent $cleaned_list.remove('mzidentML') #end if #if 'zip' in $cleaned_list: #silent $cleaned_list.remove('zip') #end if ## Only numbers are left over. These corresponds to different reports. -reports #echo ','.join($cleaned_list)# 2>> $temp_stderr) && #end if ## # ls -l \$cwd/output_reports/* ; #if '0' in str($outputs).split(','): find \$cwd/output_reports -name '*Certificate_of_Analysis*' -exec bash -c 'mv "$0" "certificate.txt"' {} \; ; #end if #if '1' in str($outputs).split(','): find \$cwd/output_reports -name '*Hierarchical*' -exec bash -c 'mv "$0" "hierarchical.txt"' {} \; ; #end if #if '2' in str($outputs).split(','): find \$cwd/output_reports -name '*PSM_Phosphorylation_Report*' -exec bash -c 'mv "$0" "psm_phospho.txt"' {} \; ; #end if #if '8' in str($outputs).split(','): find \$cwd/output_reports -name '*Extended_PSM_Report*' -exec bash -c 'mv "$0" "psmx.txt"' {} \; ; #end if #if '3' in str($outputs).split(','): find \$cwd/output_reports -name '*PSM_Report*' -exec bash -c 'mv "$0" "psm.txt"' {} \; ; #end if #if '4' in str($outputs).split(','): find \$cwd/output_reports -name '*Peptide_Phosphorylation_Report*' -exec bash -c 'mv "$0" "peptides_phospho.txt"' {} \; ; #end if #if '5' in str($outputs).split(','): find \$cwd/output_reports -name '*Peptide_Report*' -exec bash -c 'mv "$0" "peptides.txt"' {} \; ; #end if #if '6' in str($outputs).split(','): find \$cwd/output_reports -name '*Protein_Phosphorylation_Report*' -exec bash -c 'mv "$0" "proteins_phospho.txt"' {} \; ; #end if #if '7' in str($outputs).split(','): find \$cwd/output_reports -name '*Protein_Report*' -exec bash -c 'mv "$0" "proteins.txt"' {} \; ; #end if exit_code_for_galaxy=\$?; cat $temp_stderr 2>&1; (exit \$exit_code_for_galaxy) ]]> </command> <inputs> <param name="searchgui_input" format="searchgui_archive" type="data" label="Compressed SearchGUI results" help="SearchGUI Results from History"> <options options_filter_attribute="metadata.searchgui_major_version" > <filter type="add_value" value="@SEARCHGUI_MAJOR_VERSION@" /> </options> </param> <conditional name="processing_options"> <param name="processing_options_selector" type="select" label="Specify Advanced PeptideShaker Processing Options"> <option value="no" selected="True">Default Processing Options</option> <option value="yes">Advanced Processing Options</option> </param> <when value="no" /> <when value="yes"> <param name="protein_fdr" label="FDR at the protein level" help="In percent (default 1% FDR: '1')" value="1" type="float" /> <param name="peptide_fdr" label="FDR at the peptide level" help="In percent (default 1% FDR: '1')" value="1" type="float" /> <param name="psm_fdr" label="FDR at the PSM level" help="In percent (default 1% FDR: '1')" value="1" type="float" /> <param name="protein_fraction_mw_confidence" value="95.0" type="float" label="Minimum confidence required for a protein in the fraction MW plot" help="default 95%: '95.0'" /> <conditional name="ptm_score"> <param name="ptm_score_selector" type="select" label="The PTM probabilistic score to use for PTM localization"> <option value="0" selected="True">A-score</option> <option value="1">PhosphoRS</option> <option value="2">None</option> </param> <when value="0" /> <when value="1"> <param name="score_neutral_losses" label="Include Neutral Losses in A Score" type="boolean" truevalue="1" falsevalue="0" /> <param name="ptm_threshold" label="The threshold to use for the PTM scores" optional="true" value="" type="float" help="Automatic mode will be used if not set" /> </when> <when value="2" /> </conditional> <param name="ptm_sequence_matching_type" type="select" label="The PTM to peptide sequence matching type"> <option value="0">Character Sequence</option> <option value="1" selected="true">Amino Acids</option> <option value="2">Indistinguishable Amino Acids</option> </param> <param name="ptm_alignment" label="Align peptide ambiguously localized PTMs on confident sites" type="boolean" truevalue="1" falsevalue="0" checked="true"/> <!-- SKIPPING -protein_fraction_mw_confidence ${processing_options.protein_fraction_mw_confidence} --> </when> </conditional> <conditional name="filtering_options"> <param name="filtering_options_selector" type="select" label="Specify Advanced Filtering Options" help="Filter based on peptide lengths, precursor mass error, E value errors from X! Tandem and OMSSA, and include/exclude unknown PTM’s"> <option value="no" selected="True">Default Filtering Options</option> <option value="yes">Advanced Filtering Options</option> </param> <when value="no" /> <when value="yes"> <param name="min_peptide_length" type="integer" label="Minimum Peptide Length" value="6" /> <param name="max_peptide_length" type="integer" label="Maximum Peptide Length" value="30" /> <param name="max_precursor_error" type="float" label="Maximum Precursor Error" value="10" help="Next option specifies units (Da or ppm)" /> <param name="max_precursor_error_type" label="Maximum Precursor Error Type" type="select"> <option value="1">ppm</option> <option value="0">Daltons</option> </param> <!--param name="max_xtandem_e" label="Maximum X! Tandem e-value" value="100" type="float" help="" /--> <!--param name="max_omssa_e" label="Maximum OMSSA e-value" value="100" type="float" help="" /--> <!--param name="max_mascot_e" label="Maximum Mascot e-value filter" value="100" type="float" help="" /--> <param name="exclude_unknown_ptms" label="Exclude Unknown PTMs" type="boolean" truevalue="1" falsevalue="0" checked="true" /> </when> </conditional> <conditional name="contact_options"> <param name="contact_options_selector" type="select" label="Specify Contact Information for mzIdendML" help="Create a Galaxy workflow to save these values"> <option value="no" selected="True">GalaxyP Project contact (Not suitable for PRIDE submission)</option> <option value="yes">Specify Contact Information</option> </param> <when value="no" /> <when value="yes"> <param name="contact_first_name" type="text" value="" label="Contact first name."> <validator type="regex" message="">\S+.*</validator> </param> <param name="contact_last_name" type="text" value="" label="Contact last name."> <validator type="regex" message="">\S+.*</validator> </param> <param name="contact_email" type="text" value="" label="Contact e-mail."> <validator type="regex" message="">\S+@\S+</validator> </param> <param name="contact_address" type="text" value="" label="Contact address."> <validator type="regex" message="">\S+.*</validator> </param> <param name="contact_url" type="text" value="" optional="true" label="Contact URL."> </param> <param name="organization_name" type="text" value="" label="Organization name."> <validator type="regex" message="">\S+.*</validator> </param> <param name="organization_email" type="text" value="" label="Organization e-mail."> <validator type="regex" message="">\S+@\S+</validator> </param> <param name="organization_address" type="text" value="" label="Organization address."> <validator type="regex" message="">\S+.*</validator> </param> <param name="organization_url" type="text" value="" optional="true" label="Organization URL."> </param> </when> </conditional> <param name="include_sequences" type="boolean" truevalue="1" falsevalue="0" checked="false" label="Include the protein sequences in mzIdentML" /> <param name="outputs" type="select" display="checkboxes" multiple="True" label="Output options"> <option value="zip">Zip File for import to Desktop App</option> <option value="mzidentML" selected="True">mzidentML File</option> <option value="3">PSM Report</option> <option value="8">Extended PSM Report</option> <option value="2">PSM Phosphorylation Report</option> <option value="5">Peptide Report</option> <option value="4">Peptide Phosphorylation Report</option> <option value="7">Protein Report</option> <option value="6">Protein Phosphorylation Report</option> <option value="0">Certificate of Analysis</option> <option value="1">Hierarchical Report</option> <option value="cps">CPS file</option> <validator type="no_options" message="Please select at least one output file" /> </param> </inputs> <outputs> <data format="mzid" name="mzidentML" from_work_dir="output.mzid" label="${tool.name} on ${on_string}: mzidentML file"> <filter>'mzidentML' in outputs</filter> </data> <data format="peptideshaker_archive" name="output_cps" from_work_dir="peptideshaker_output.cps" label="${tool.name} on ${on_string}: CPS file"> <filter>'cps' in outputs</filter> </data> <data format="zip" name="output_zip" from_work_dir="peptideshaker_output.zip" label="${tool.name} on ${on_string}: Archive"> <filter>'zip' in outputs</filter> </data> <data format="txt" name="output_certificate" from_work_dir="certificate.txt" label="${tool.name} on ${on_string}: Parameters"> <filter>'0' in outputs</filter> </data> <data format="tabular" name="output_hierarchical" from_work_dir="hierarchical.txt" label="${tool.name} on ${on_string}: Hierarchical Report"> <filter>'1' in outputs</filter> </data> <data format="tabular" name="output_psm_phosphorylation" from_work_dir="psm_phospho.txt" label="${tool.name} on ${on_string}: PSM Phosphorylation Report"> <filter>'2' in outputs</filter> </data> <data format="tabular" name="output_psm" from_work_dir="psm.txt" label="${tool.name} on ${on_string}: PSM Report"> <filter>'3' in outputs</filter> </data> <data format="tabular" name="output_extended_psm" from_work_dir="psmx.txt" label="${tool.name} on ${on_string}: Extended PSM Report"> <filter>'8' in outputs</filter> </data> <data format="tabular" name="output_peptides_phosphorylation" from_work_dir="peptides_phospho.txt" label="${tool.name} on ${on_string}: Peptide Phosphorylation Report"> <filter>'4' in outputs</filter> </data> <data format="tabular" name="output_peptides" from_work_dir="peptides.txt" label="${tool.name} on ${on_string}: Peptide Report"> <filter>'5' in outputs</filter> </data> <data format="tabular" name="output_proteins_phosphorylation" from_work_dir="proteins_phospho.txt" label="${tool.name} on ${on_string}: Protein Phosphorylation Report"> <filter>'6' in outputs</filter> </data> <data format="tabular" name="output_proteins" from_work_dir="proteins.txt" label="${tool.name} on ${on_string}: Protein Report"> <filter>'7' in outputs</filter> </data> </outputs> <tests> <test> <param name="searchgui_input" value="tiny_searchgui_result1.zip" ftype="searchgui_archive"/> <param name="processing_options_selector" value="no"/> <param name="filtering_options_selector" value="no"/> <param name="outputs" value="zip,3"/> <output name="output_zip" file="peptide_shaker_result1.zip" ftype="zip" compare="sim_size" delta="3000" /> <output name="output_psm"> <assert_contents> <has_text text="cds.comp41779_c0_seq1" /> </assert_contents> </output> </test> <test> <param name="searchgui_input" value="tiny_searchgui_result1.zip" ftype="searchgui_archive"/> <param name="processing_options_selector" value="no"/> <param name="filtering_options_selector" value="yes"/> <param name="min_peptide_length" value="1"/> <param name="outputs" value="0,1,2,3,4,5,6,7"/> <output name="output_certificate"> <assert_contents> <has_text text="Tolerance: 100" /> <has_text text="Carbamidomethylation of C" /> </assert_contents> </output> <output name="output_hierarchical"> <assert_contents> <has_text_matching expression="1.1\tcds.comp" /> </assert_contents> </output> <output name="output_psm"> <assert_contents> <not_has_text text="Phosphosite" /> <has_text text="cds.comp41779_c0_seq1" /> </assert_contents> </output> <output name="output_psm_phosphorylation"> <assert_contents> <has_text text="Phosphosite" /> <has_text text="cds.comp41779_c0_seq1" /> </assert_contents> </output> <output name="output_peptides"> <assert_contents> <has_text text="Modification" /> <has_text text="cds.comp41779_c0_seq1" /> </assert_contents> </output> <output name="output_peptides_phosphorylation"> <assert_contents> <has_text text="Phosphosite" /> <has_text text="cds.comp41779_c0_seq1" /> </assert_contents> </output> <output name="output_proteins"> <assert_contents> <has_text text="Modification" /> <has_text text="cds.comp41779_c0_seq1" /> </assert_contents> </output> <output name="output_proteins_phosphorylation"> <assert_contents> <has_text text="Phosphosite" /> <has_text text="cds.comp41779_c0_seq1" /> </assert_contents> </output> </test> </tests> <help> **What it does** Runs multiple search engines (X! Tandem, OMSSA and MS-GF+) on any number of MGF peak lists using the SearchGUI application and combines the results. http://compomics.github.io/projects/peptide-shaker.html http://compomics.github.io/projects/searchgui.html ---- Reports ======= PSM Report ---------- * Protein(s): Protein(s) to which the peptide can be attached * Sequence: Sequence of the peptide * Variable Modifications: The variable modifications * D-score: D-score for variable PTM localization * probabilistic PTM score: The probabilistic score (e.g. A-score or PhosphoRS) used for variable PTM localization. * Localization Confidence: The confidence in variable PTM localization. * Fixed Modifications: The fixed modifications. * Spectrum File: The spectrum file. * Spectrum Title: The title of the spectrum. * Spectrum Scan Number: The spectrum scan number. * RT: Retention time * m/z: Measured m/z * Measured Charge: The charge as given in the spectrum file. * Identification Charge: The charge as inferred by the search engine. * Theoretical Mass: The theoretical mass of the peptide. * Isotope Number: The isotope number targetted by the instrument. * Precursor m/z Error: The precursor m/z matching error. * Score: Score of the retained peptide as a combination of the algorithm scores (used to rank PSMs). * Confidence: Confidence in percent associated to the retained PSM. * Decoy: Indicates whether the peptide is a decoy (1: yes, 0: no). * Validation: Indicates the validation level of the protein group. Protein Report -------------- * Main Accession: Main accession of the protein group. * Description: Description of the protein designed by the main accession. * Gene Name: The gene names of the Ensembl gene ID associated to the main accession. * Chromosome: The chromosome of the Ensembl gene ID associated to the main accession. * PI: Protein Inference status of the protein group. * Secondary Accessions: Other accessions in the protein group (alphabetical order). * Protein Group: The complete protein group (alphabetical order). * #Peptides: Total number of peptides. * #Validated Peptides: Number of validated peptides. * #Unique: Total number of peptides unique to this protein group. * #PSMs: Number of PSMs * #Validated PSMs: Number of validated PSMs * Coverage (%): Sequence coverage in percent of the protein designed by the main accession. * Possible Coverage (%): Possible sequence coverage in percent of the protein designed by the main accession according to the search settings. * MW (kDa): Molecular Weight. * Spectrum Counting NSAF: Normalized Spectrum Abundance Factor (NSAF) * Spectrum Counting emPAI: exponentially modified Protein Abundance Index (emPAI) * Confident Modification Sites: Number of Confident Modification Sites List of the sites where a variable modification was confidently localized. * Other Modification Sites: Number of other Modification Sites List of the non*confident sites where a variable modification was localized. * Score: Score of the protein group. * Confidence: Confidence in percent associated to the protein group. * Decoy: Indicates whether the protein group is a decoy (1: yes, 0: no). * Validation: Indicates the validation level of the protein group. Peptide Report -------------- * Protein(s): Protein(s) to which this peptide can be attached. * AAs Before: The amino-acids before the sequence. * Sequence: Sequence of the peptide. * AAs After: The amino-acids after the sequence. * Modified Sequence: The peptide sequence annotated with variable modifications. * Variable Modifications: The variable modifications. * Localization Confidence: The confidence in PTMs localization. * Fixed Modifications: The fixed modifications. * #Validated PSMs: Number of validated PSMs. * #PSMs: Number of PSMs. * Score: Score of the peptide. * Confidence: Confidence in percent associated to the peptide. * Decoy: Indicates whether the peptide is a decoy (1: yes, 0: no). * Validation: Indicates the validation level of the protein group. Hierachical Report ------------------ * Main Accession: Main accession of the protein group. * Description: Description of the protein designed by the main accession. * PI: Protein Inference status of the protein group. * Secondary Accessions: Other accessions in the protein group (alphabetical order). * Protein Group: The complete protein group (alphabetical order). * #Peptides: Total number of peptides. * #Validated Peptides: Number of validated peptides. * #Unique: Total number of peptides unique to this protein group. * #PSMs: Number of PSMs * #Validated PSMs: Number of validated PSMs * Coverage (%): Sequence coverage in percent of the protein designed by the main accession. * Possible Coverage (%): Possible sequence coverage in percent of the protein designed by the main accession according to the search settings. * MW (kDa): Molecular Weight. * Spectrum Counting NSAF: Normalized Spectrum Abundance Factor (NSAF) * Spectrum Counting emPAI: exponentially modified Protein Abundance Index (emPAI) * Confident Modification Sites: # Confident Modification Sites List of the sites where a variable modification was confidently localized. * Other Modification Sites: # Other Modification Sites List of the non-confident sites where a variable modification was localized. * Score: Score of the protein group. * Confidence: Confidence in percent associated to the protein group. * Decoy: Indicates whether the protein group is a decoy (1: yes, 0: no). * Validation: Indicates the validation level of the protein group. * Protein(s): Protein(s) to which this peptide can be attached. * AAs Before: The amino-acids before the sequence. * Sequence: Sequence of the peptide. * AAs After: The amino-acids after the sequence. * Variable Modifications: The variable modifications. * Localization Confidence: The confidence in PTMs localization. * Fixed Modifications: The fixed modifications. * #Validated PSMs: Number of validated PSMs. * #PSMs: Number of PSMs. * Score: Score of the peptide. * Confidence: Confidence in percent associated to the peptide. * Decoy: Indicates whether the peptide is a decoy (1: yes, 0: no). * Validation: Indicates the validation level of the protein group. * Protein(s): Protein(s) to which the peptide can be attached. * Sequence: Sequence of the peptide. * Modified Sequence: The peptide sequence annotated with variable modifications. * Variable Modifications: The variable modifications. * D-score: D-score for variable PTM localization. * probabilistic PTM score: The probabilistic score (e.g. A-score or PhosphoRS) used for variable PTM localization. * Localization Confidence: The confidence in variable PTM localization. * Fixed Modifications: The fixed modifications. * Spectrum File: The spectrum file. * Spectrum Title: The title of the spectrum. * Spectrum Scan Number: The spectrum scan number. * RT: Retention time * m/z: Measured m/z * Measured Charge: The charge as given in the spectrum file. * Identification Charge: The charge as inferred by the search engine. * Theoretical Mass: The theoretical mass of the peptide. * Isotope Number: The isotope number targetted by the instrument. * Precursor m/z Error: The precursor m/z matching error. * Score: Score of the retained peptide as a combination of the algorithm scores (used to rank PSMs). * Confidence: Confidence in percent associated to the retained PSM. * Decoy: Indicates whether the peptide is a decoy (1: yes, 0: no). * Validation: Indicates the validation level of the protein group. ------ **Citation** To cite the underlying tools (PeptideShaker and SearchGUI) please refer to the list of papers at http://compomics.github.io If you use this tool in Galaxy, please cite Chilton J, Ira Cooke, Bjoern Gruening et al. </help> <expand macro="citations" /> </tool>