view searchgui.xml @ 60:a6ff76e057fd draft

"planemo upload for repository commit 62d0c9c13383dc58cc75cd273e0395e53a42b003"
author galaxyp
date Tue, 20 Jul 2021 08:55:11 +0000
parents 943a34df6047
children 3ec27b4cee7c
line wrap: on
line source

<tool id="search_gui"
      name="Search GUI"
        Perform protein identification using various search engines and prepare results for input to Peptide Shaker
        <requirement type="package" version="@SEARCHGUI_VERSION@">searchgui</requirement>
        <requirement type="package" version="3.0">zip</requirement>
    <expand macro="stdio" />
    <command use_shared_home="false">

        #from datetime import datetime
        #import json
        #import os
        #set $exp_str = "Galaxy_Experiment_%s" %"%Y%m%d%H%M%s")
        #set $samp_str = "Sample_%s" %"%Y%m%d%H%M%s")
        #set $bin_dir = "bin"

        mkdir output;
        mkdir output_reports;
        mkdir temp_folder;
        mkdir log_folder;
        trap ">&2 cat log_folder/*.log" EXIT;

        export HOME=\$cwd;

        ## echo the search engines to run (single quotes important because X!Tandem)
        echo '$search_engines_options.engines';
        echo 'DB: ${input_fasta_file.element_identifier} sequences: ${input_fasta_file.metadata.sequences}';

        ##Create a file for the version, which will be added to the searchgui_results if not already present
        echo 'searchgui.version=@SEARCHGUI_VERSION@' >>;

        #for $peak_list_file in $peak_lists_files:
            #set $input_name = ""
            #if $peak_list_file.is_of_type("mgf"):
                #set $input_name = $peak_list_file.element_identifier.split('/')[-1].replace(".mgf", "") + ".mgf"
            #else if $peak_list_file.is_of_type("mzml"):
                #set $input_name = $peak_list_file.element_identifier.split('/')[-1].replace(".mzml", "") + ".mzml"
            #end if
            ln -s -f '${peak_list_file}' '${input_name}';
            #set $encoded_id = $$
            echo 'Spectrums:${peak_list_file.element_identifier}(API:${encoded_id})';
        #end for

        ## copy the input .par file to the working folder
        cp '${input_parameters_file}' './SEARCHGUI_IdentificationParameters.par';

        ## copy the input .fasta file to the working folder
        cp '${input_fasta_file}' './input_fasta_file.fasta';

        ## Search CLI ##
        echo 'running search gui' &&
        searchgui -Djava.awt.headless=true eu.isas.searchgui.cmd.SearchCLI
            -use_log_folder 0
            -spectrum_files \$cwd
            -fasta_file "\$cwd/input_fasta_file.fasta"
            -output_folder \$cwd/output
            -id_params ./SEARCHGUI_IdentificationParameters.par
            -temp_folder \$cwd/temp_folder
            -log \$cwd/log_folder
            -threads "\${GALAXY_SLOTS:-12}"

            #if $searchgui_advanced.searchgui_advanced_selector == 'advanced'
                -correct_titles "${searchgui_advanced.correct_titles}"
                -mgf_splitting "${searchgui_advanced.mgf_splitting}"
                -mgf_spectrum_count "${searchgui_advanced.mgf_spectrum_count}"
                -output_gzip "${searchgui_advanced.output_gzip}"
            #end if

            #set $engines_list = str($search_engines_options.engines).split(',')
            #if 'X!Tandem' in $engines_list:
                -xtandem 1
                -xtandem 0
            #end if

            #if 'MyriMatch' in $engines_list:
                -myrimatch 1
                -myrimatch 0
            #end if

            #if 'MSGF' in $engines_list:
                -msgf 1
                -msgf 0
            #end if

            #if 'OMSSA' in $engines_list:
                -omssa 1
                -omssa 0
            #end if

            #if 'Comet' in $engines_list:
                -comet 1
                -comet 0
            #end if

            #if 'Tide' in $engines_list:
                -tide 1
                -tide 0
            #end if

            #if 'MS_Amanda' in $engines_list:
                -ms_amanda 1
                -ms_amanda 0
            #end if

            #if 'Andromeda' in $engines_list:
                -andromeda 1
                -andromeda 0
            #end if

            #if 'MetaMorpheus' in $engines_list:
                -meta_morpheus 1
                -meta_morpheus 0
            #end if

            #if 'Novor' in $engines_list:
                -novor 1
                -novor 0
            #end if

            #if 'DirecTag' in $engines_list:
                -directag 1
                -directag 0
            #end if

            ## single zip file
            -output_option 0

            ## mgf and database in output
            -output_data 1


        echo '>log_folder:' &&

        ls -l \$cwd/log_folder &&

        mv output/


        zip -u
        <param format="json" name="input_parameters_file" type="data" label="Identification Parameters file"/>

        <param format="fasta" name="input_fasta_file" type="data" label="Fasta file" help="For postprocessing with PeptideShaker the sequences must have UniProt style headers and contain decoy sequences (see help)."/>

        <param name="peak_lists_files" format="mgf,mzml" type="data" multiple="true" label="Input Peak Lists"
            help="Select appropriate MGF/MZML dataset(s) from history.
               Remember to choose only .mzML files when using MetaMorpheus." />

        <!-- Search Engine Selection -->
        <section name="search_engines_options" expanded="true" title="Search Engine Options">
            <param name="engines" type="select" display="checkboxes" multiple="True" label="DB-Search Engines">
                <help>Comet and Tide shouldn't both be selected since they use a similar algoritm. OMSSA might not work into isolated environments like containers. Ms Amanda may not work either when executed into isolated environments based on MacOS X.
                    MetaMorpheus only produce results when using mzML format.</help>
                <option value="X!Tandem" selected="True">X!Tandem</option>
                <option value="MSGF" selected="True">MS-GF+</option>
                <option value="OMSSA">OMSSA</option>
                <option value="Comet">Comet</option>
                <option value="Tide">Tide</option>
                <option value="MyriMatch">MyriMatch</option>
                <option value="MS_Amanda">MS_Amanda</option>
                <option value="MetaMorpheus">MetaMorpheus</option>
                <!-- Windows only
                <option value="Andromeda">Andromeda</option>
                <!-- New with version 3.0
                <!--working in tests
                <option value="DirecTag">DirecTag</option>
                <option value="Novor">Novor (Select for non-commercial use only)</option>
                <validator type="no_options" message="Please select at least one output file" />

        <conditional name="searchgui_advanced">
            <param name="searchgui_advanced_selector" type="select" label="SearchGUI Options">
                <option value="basic" selected="True">Default</option>
                <option value="advanced">Advanced</option>
            <when value="basic" />
            <when value="advanced">
                <param name="correct_titles" type="select" label="How should PeptideShaker deal with duplicate spectra?"
                    help="Unless you suspect some input files to be genuine duplicates then rename spectra is the safest option">
                    <option value="0">no correction</option>
                    <option value="1" selected="True">rename spectra</option>
                    <option value="2">delete spectra</option>
                <param name="missing_titles" type="boolean" checked="false" truevalue="-missing_titles 1" falsevalue="-missing_titles 0"
                    label="Add missing spectrum titles" help="(-missing_titles)"/>
                <param name="mgf_splitting" type="integer" value="1000" label="The maximum mgf file size in MB before splitting the mgf"
                    help="Choose a smaller value if you are running on a machine with limited memory"/>
                <param name="mgf_spectrum_count" type="integer" value="25000" label="The maximum number of spectra per mgf file when splitting"
                    help="Choose a smaller value if you are running on a machine with limited memory"/>
                <param name="output_gzip" type="boolean" checked="true" truevalue="1" falsevalue="0"
                    label="Gzip result files"/>

        <data name="searchgui_results" format="searchgui_archive" from_work_dir="" label="${} on ${on_string}" />

        <!-- Test that specifying non-default search engines with default parameters works -->
            <param name="peak_lists_files" value="searchgui_tinyspectra1.mgf"/>
            <param name="input_parameters_file" value="Identification_Parameters_default.par"/>
            <param name="input_fasta_file" value="fastacli_searchgui_tinydb1_concatenated_target_decoy.fasta" ftype="fasta" />
            <param name="engines" value="X!Tandem,MSGF,MyriMatch,Comet"/>
            <output name="searchgui_results" file="" ftype="searchgui_archive" compare="sim_size" delta="30000" />

        <!-- Test that search works with MetaMorpheus with default parameters works-->
        <!-- Test data has been taken from metamorpheus galaxy tool -->
            <param name="peak_lists_files" value="searchgui_smallCalibratible_Yeast.mzML"/>
            <param name="input_parameters_file" value="Identification_Parameters_default.par"/>
            <param name="input_fasta_file" value="searchgui_smallYeast.fasta" ftype="fasta" />
            <param name="engines" value="MetaMorpheus"/>
            <output name="searchgui_results" ftype="searchgui_archive">
                    <has_size value="798597" delta="10000"/>

        <!-- Test that search works with MSAmanda with default parameters works-->
            <param name="peak_lists_files" value="searchgui_smallspectra.mgf"/>
            <param name="input_parameters_file" value="Identification_Parameters_default.par"/>
            <param name="input_fasta_file" value="fastacli_searchgui_tinydb1_concatenated_target_decoy.fasta" ftype="fasta" />
            <param name="engines" value="MS_Amanda"/>
            <output name="searchgui_results" ftype="searchgui_archive">
                    <has_size value="635138" delta="5000"/>

		    <!-- Test that specifying non-default search engines with non-default parameters works -->
           <param name="peak_lists_files" value="searchgui_tinyspectra1.mgf"/>
           <param name="input_parameters_file" value="Identification_Parameters_specific.par"/>
           <param name="input_fasta_file" value="fastacli_searchgui_tinydb1_concatenated_target_decoy.fasta" ftype="fasta" />
           <param name="engines" value="X!Tandem,MSGF,MyriMatch,Comet"/>
           <output name="searchgui_results" ftype="searchgui_archive">
                   <has_size value="159330" delta="20000"/>

            NOTE: Identification_Parameters_specific.par is equivalent to the default .par plus these parameters:
            <param name="precursor_ion_tol" value="100"/>
            <param name="min_charge" value="1"/>
            <param name="max_charge" value="3"/>
            <param name="xtandem|xtandem_advanced" value="yes"/>
            <param name="xtandem|xtandem_advanced|xtandem_refine_selector" value="yes"/>

        <!-- Test that specifying MsAmanda as search engine with non-default parameters works -->
            <param name="peak_lists_files" value="searchgui_smallspectra.mgf"/>
            <param name="input_parameters_file" value="Identification_Parameters_specific.par"/>
            <param name="input_fasta_file" value="fastacli_searchgui_tinydb1_concatenated_target_decoy.fasta" ftype="fasta" />
            <param name="engines" value="MS_Amanda"/>
            <output name="searchgui_results" ftype="searchgui_archive">
                    <has_size value="635158" delta="5000"/>

        <!-- Test that specifying non-default search engines with default parameters works using modifications -->
            <param name="peak_lists_files" value="searchgui_tinyspectra1.mgf"/>
            <param name="input_parameters_file" value="Identification_Parameters_default_modifications.par"/>
            <param name="input_fasta_file" value="fastacli_searchgui_tinydb1_concatenated_target_decoy.fasta" ftype="fasta" />
            <param name="engines" value="X!Tandem,MSGF,MyriMatch,Comet"/>
            <output name="searchgui_results" file="" ftype="searchgui_archive" compare="sim_size" delta="30000" />

            NOTE: Identification_Parameters_default_modifications.par adds:
            - "Carbamidomethylation of C" as fixed modification
            - "Oxidation of M" as variable modification

        <!-- Test that search works with MSAmanda with default default parameters - with modifications -->
            <param name="peak_lists_files" value="searchgui_tinyspectra1.mgf"/>
            <param name="input_parameters_file" value="Identification_Parameters_default_modifications.par"/>
            <param name="input_fasta_file" value="fastacli_searchgui_tinydb1_concatenated_target_decoy.fasta" ftype="fasta" />
            <param name="engines" value="MS_Amanda"/>
            <output name="searchgui_results" ftype="searchgui_archive">
                    <has_size value="118136" delta="30000"/>

**What it does**

Runs multiple search engines on any number of MGF peak lists using SearchGUI.

Default:     X! Tandem and MS-GF+ are executed.

Optional:     MyriMatch, MS-Amanda, MetaMorpheus, OMSSA (it may not work into isolated environments like containers), Comet, Tide, DirecTag and Novor can be executed.

**Input FASTA**

In order to allow for postprocessing with PeptideShaker the sequences must contain decoy sequences (see _here:
and the FASTA header must either contain no "|" characters (then the whole header will be used as ID) or have the following format:

    >generic[your tag]|[protein accession]|[protein description]


    >generic[your tag]|[protein accession]

See _here:

    <expand macro="citations" />