view morpheus.xml @ 3:8295d959ca28 draft default tip

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/morpheus commit 84a14a4f77dc68f80e6f7d9ac562b6d9b46d58b1
author galaxyp
date Mon, 15 Jul 2024 18:21:24 +0000
parents e0104a9f20f0
children
line wrap: on
line source

<tool id="morpheus" name="Morpheus" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
    <description>database search algorithm for high-resolution tandem mass spectra</description>
    <macros>
	  <import>macros.xml</import>
    </macros>
    <expand macro="edam_ontology"/>
    <expand macro="biotools"/>
    <expand macro="requirements"/>
    <stdio>
        <exit_code range="1:" />
        <regex match="System..*Exception"
           source="both"
           level="fatal"
           description="Error encountered" />
    </stdio>

    <command><![CDATA[
        mkdir -p output_reports &&
        cwd=`pwd` &&
        #import re
        #set $searchdb_name = $searchdb
        #if $searchdb.is_of_type('uniprotxml'): 
          #set $searchdb_name = 'searchdb.xml'
        #else
          #set $searchdb_name = 'searchdb.fasta'
        #end if
        ln -s $searchdb $searchdb_name
        ## Need to link each input to a name in cwd, names must be unique
        #set $input_list = []
        #for i,input in enumerate($inputs):
          #set $input_name = $re.sub('(?i)([.]?mzML)*$','.mzML',$re.sub('\W','_',$input.name))
          #if $input_name in $input_list:
            #set $input_name = str($i) + '_' + $input_name
          #end if
          #set $input_list = $input_list + [$input_name]
          && ln -s $input $input_name
        #end for
        #set $input_names = ','.join($input_list)
        &&

        mono `which morpheus`
        -d='$input_names'
        -db='$searchdb_name'

        #if $searchdb.is_of_type('uniprotxml'): 
          #if str( $advanced.adv_options_selector) == "set":
            $advanced.noup
          #end if
        #end if
        ## fm vm fdr mvmi precmt precmtv precmtu
        #if str($fdr) != '':
            -fdr=$fdr
        #end if
        #if  str($mvmi) != '':
            -mvmi=$mvmi
        #end if
        #if  str($precmtv) != '':
            -precmtv=$precmtv
        #end if
        #if  str($precmtu) != 'None':
            -precmtu=$precmtu
        #end if
        #if str( $advanced.adv_options_selector) == "set":
            #if  str($advanced.precmt) != 'None':
                -precmt=$advanced.precmt
            #end if
            #if  str($advanced.minprecz) != '':
                -minprecz=$advanced.minprecz
            #end if
            #if  str($advanced.maxprecz) != '':
                -maxprecz=$advanced.maxprecz
            #end if
            #if  str($advanced.at) != '':
                -at=$advanced.at
            #end if
            #if  str($advanced.rt) != '':
                -rt=$advanced.rt
            #end if
            #if  str($advanced.mp) != '':
                -mp=$advanced.mp
            #end if
            #if  str($advanced.mmc) != '':
                -mmc=$advanced.mmc
            #end if
            #if  str($advanced.prodmt) != 'None':
                -prodmt=$advanced.prodmt
            #end if
            #if  str($advanced.prodmtv) != '':
                -prodmtv=$advanced.prodmtv
            #end if
            #if  str($advanced.prodmtu) != 'None':
                -prodmtu=$advanced.prodmtu
            #end if
            #if  str($advanced.minpmo) != '':
                -minpmo=$advanced.minpmo
            #end if
            #if  str($advanced.maxpmo) != '':
                -maxpmo=$advanced.maxpmo
            #end if
            #if  str($advanced.imb) != 'None':
                -imb=$advanced.imb
            #end if
            #if  str($advanced.ad) != 'None':
                -ad=$advanced.ad
            #end if
            $advanced.acs $advanced.di $advanced.pmc $advanced.cmu $advanced.mmu
        #end if
        #if str($fm) != 'None':
            #set $fmods = str($fm).replace(',',';')
            -fm="$fmods"
        #end if
        #if str($vm) != 'None':
            #set $vmods = str($vm).replace(',',';')
            -vm="$vmods"
	#end if
	#if str($p) != 'None':
	   -p="$p"
	#end if
        -mt=\${GALAXY_SLOTS:-4}
        #set $out_list = 'log.txt summary.tsv aggregate.PSMs.tsv aggregate.unique_peptides.tsv aggregate.protein_groups.tsv aggregate.mzid *.pep.xml'
        #if len($input_list) == 1:
          && ln -s *.log.txt log.txt
          && ln -s *.mzid aggregate.mzid 
          && ln -s *.unique_peptides.tsv aggregate.unique_peptides.tsv
          && ln -s *.protein_groups.tsv aggregate.protein_groups.tsv
          && ln -s *.PSMs.tsv aggregate.PSMs.tsv
        #end if
          && ( basepath=`grep 'Proteome Database:' log.txt  | sed 's/Proteome Database: \(.*\)${$searchdb_name}/\1/'`;
             for i in $out_list; do cat \$i | sed "s#\${basepath}\##" > output_reports/\$i; done )
    ]]></command>
    <inputs>
        <param name="inputs" type="data" format="mzml" multiple="true" label='Indexed mzML' />
        <param name="searchdb" type="data" format="fasta,uniprotxml" label="MS Protein Search Database: UniProt Xml or Fasta"/>
        <param name="fm" type="select" multiple="true" optional="true" label="Fixed Modifications">
            <expand macro="fixed_modification_options" />
        </param>
        <param name="vm" type="select" multiple="true" optional="true" label="Variable Modifications">
            <expand macro="variable_modification_options" />
    	</param>
	<param name="p" type="select" label="Proteases">
	    <expand macro="proteases_options"/>
	</param>
        <param name="fdr" type="float" value="1" optional="true" min="0.0" max="100.0" label="FDR (Maximum False Discovery Rate percent)" />
        <param name="mvmi" type="integer" value="1024" optional="true" min="0" label="Maximum Variable Modification Isoforms Per Peptide" />
        <param name="precmtv" type="float" value="10." optional="true" label="Precursor Mass Tolerance Value" />
        <param name="precmtu" type="select" optional="true" label="Precursor Mass Tolerance Units">
            <option value="ppm" selected="true">ppm</option>
            <option value="Da">Daltons</option>
        </param>
        <conditional name="advanced">
            <param name="adv_options_selector" type="select" label="Set advanced options?" help="Provides additional controls">
                <option value="set">Set</option>
                <option value="do_not_set" selected="True">Do not set</option>
            </param>
            <when value="set">
                <param name="precmt" type="select" optional="true" label="Precursor Mass Type">
                    <option value="Monoisotopic">Monoisotopic</option>
                    <option value="Average">Average</option>
                </param>
                <param name="noup" type="boolean" truevalue="" falsevalue="-noup=True" checked="True" label="Use G-PTM with Uniprot Proteome Search Databases" />
                <param name="minprecz" type="integer" value="2" optional="true" label="Minimum Unknown Precursor Charge State" />
                <param name="maxprecz" type="integer" value="4" optional="true" label="Maximum Unknown Precursor Charge State" />
                <param name="at" type="float" value="" optional="true" min="0.0" label="Absolute MS/MS Intensity Threshold" />
                <param name="rt" type="float" value="" optional="true" min="0.0" label="Relative MS/MS Intensity Threshold" />
                <param name="mp" type="integer" value="400" optional="true" min="-1" label="Maximum Number of MS/MS Peaks" help="to disable set to: -1"/>
                <param name="acs" type="boolean" truevalue="-acs=True" falsevalue="-acs=False" checked="true" optional="true" label="Assign Charge States" />
                <param name="di" type="boolean" truevalue="-di=True" falsevalue="-di=False" checked="false" optional="true" label="Deisotope" />
                <param name="ad" type="select" optional="true" label="Create Target-Decoy Database On The Fly" 
                       help="Set to No if Search Database ia a fasta that already includes decoys.">
                    <option value="True" selected="true">Yes</option>
                    <option value="False">No</option>
                </param>
                <param name="mmc" type="integer" value="2" optional="true" min="0" max="20" label="Maximum Missed Cleavages" />
                <param name="pmc" type="boolean" truevalue="-pmc=True" falsevalue="-pmc=False" checked="false" optional="true" label="Precursor Monoisotopic Peak Correction" />
                <param name="minpmo" type="integer" value="" optional="true" label="Minimum Precursor Monoisotopic Peak Correction" />
                <param name="maxpmo" type="integer" value="" optional="true" label="Maximum Precursor Monoisotopic Peak Correction" />
                <param name="prodmt" type="select" optional="true" label="Product Mass Type">
                    <option value="Monoisotopic">Monoisotopic</option>
                    <option value="Average">Average</option>
                </param>
                <param name="prodmtv" type="float" value="" optional="true" label="Product Mass Tolerance Value" />
                <param name="prodmtu" type="select" optional="true" label="Product Mass Tolerance Units">
                    <option value="Da">Daltons</option>
                    <option value="ppm">ppm</option>
                </param>
                <param name="imb" type="select" optional="true" label="Initiator Methionine Behavior">
                    <option value="Variable">Variable</option>
                    <option value="Retain">Retain</option>
                    <option value="Cleave">Cleave</option>
                </param>
                <param name="cmu" type="boolean" truevalue="-cmu=True" falsevalue="-cmu=False" checked="false" optional="true" label="Consider Modified Forms as Unique Peptides" />
                <param name="mmu" type="boolean" truevalue="-mmu=True" falsevalue="-mmu=False" checked="false" optional="true" label="Minimize Memory Usage" />
            </when>
            <when value="do_not_set"/>
        </conditional>
    </inputs>
    <outputs>
        <data name="log" format="txt" label="${tool.name} on ${on_string}: log.txt" from_work_dir="output_reports/log.txt" />
        <data name="summary" format="txt" label="${tool.name} on ${on_string}: summary.tsv" from_work_dir="output_reports/summary.tsv" />
        <data name="output_psms" format="tabular" label="${tool.name} on ${on_string}: PSMs.tsv" from_work_dir="output_reports/aggregate.PSMs.tsv" />
        <data name="output_unique_peptides" format="tabular" label="${tool.name} on ${on_string}: unique_peptides.tsv" from_work_dir="output_reports/aggregate.unique_peptides.tsv" />
        <data name="output_protein_groups" format="tabular" label="${tool.name} on ${on_string}: protein_groups.tsv" from_work_dir="output_reports/aggregate.protein_groups.tsv" />
        <data name="aggregate_mzid" format="mzid" label="${tool.name} on ${on_string}: aggregate.mzid" from_work_dir="output_reports/aggregate.mzid" />
        <collection name="morpheus_pep_xmls" type="list" label="${tool.name} on ${on_string} pep.xml">
            <discover_datasets pattern="(?P&lt;name&gt;.*\.pep.xml)" ext="pepxml" visible="false" directory="output_reports" />
        </collection>
    </outputs>
    <tests>
        <test>
            <param name="inputs" value="test_input.mzML" ftype="mzml"/>
            <param name="searchdb" value="uniprot-proteome_UP000002311-first100entries.fasta" ftype="fasta"/>
            <param name="fdr" value="1"/>
            <param name="mvmi" value="1024"/>
            <param name="precmt" value="Monoisotopic"/>
            <param name="precmtu" value="Da"/>
            <param name="precmtv" value="2.5"/>
            <output name="output_psms">
                <assert_contents>
                    <not_has_text text="K.TTGSSSSSSSK.K" />
                    <has_text text="carbamidomethylation of C" />
                    <has_text text="(oxidation of M)" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="inputs" value="test_input.mzML" ftype="mzml"/>
            <param name="searchdb" value="uniprot-proteome_UP000002311-first100entries.fasta" ftype="fasta"/>
            <param name="fdr" value="1"/>
            <param name="mvmi" value="1024"/>
            <param name="precmt" value="Monoisotopic"/>
            <param name="precmtu" value="Da"/>
            <param name="precmtv" value="2.5"/>
            <param name="fm" value="carbamidomethylation of C"/>
            <param name="vm" value="oxidation of M"/>
            <output name="output_psms">
                <assert_contents>
                    <has_text text="K.VETYM(oxidation of M)ETK.I" />
                    <not_has_text text="K.TTGSSSSSSSK.K" />
                </assert_contents>
            </output>
    	</test>
        <test>
            <param name="inputs" value="test_input.mzML" ftype="mzml"/>
            <param name="searchdb" value="uniprot-proteome_UP000002311-first100entries.fasta" ftype="fasta"/>
            <param name="fdr" value="1"/>
            <param name="mvmi" value="1024"/>
            <param name="precmt" value="Monoisotopic"/>
            <param name="precmtu" value="Da"/>
            <param name="precmtv" value="2.5"/>
            <param name="fm" value="carbamidomethylation of C"/>
	    <param name="vm" value="oxidation of M"/>
	    <param name="p" value="trypsin" />
            <output name="output_psms">
                <assert_contents>
                    <has_text text="K.DGM(oxidation of M)KAYAQNVQQR.E" />
                    <not_has_text text="K.TTGSSSSSSSK.K" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="inputs" value="test_input.mzML" ftype="mzml"/>
            <param name="searchdb" value="uniprot-proteome_UP000002311Condensed-first100entries.xml" ftype="uniprotxml"/>
            <param name="fdr" value="1"/>
            <param name="mvmi" value="1024"/>
            <param name="precmt" value="Monoisotopic"/>
            <param name="precmtu" value="Da"/>
            <param name="precmtv" value="2.5"/>
            <param name="fm" value="carbamidomethylation of C"/>
            <param name="vm" value="oxidation of M"/>
            <output name="output_psms">
                <assert_contents>
                    <has_text text="K.RSPSGNISTNSMR.P" />
                    <not_has_text text="K.KYFLENKIGTDR.R" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="inputs" value="test_input.mzML" ftype="mzml"/>
            <param name="searchdb" value="uniprot-proteome_UP000002311Condensed-first100entries.xml" ftype="uniprotxml"/>
            <param name="fdr" value="1"/>
            <param name="mvmi" value="1024"/>
            <param name="precmt" value="Monoisotopic"/>
            <param name="precmtu" value="Da"/>
            <param name="precmtv" value="2.5"/>
            <param name="fm" value="carbamidomethylation of C"/>
            <param name="vm" value="oxidation of M"/>
            <param name="adv_options_selector" value="set"/>
            <param name="prodmtv" value=".01"/>
            <output name="output_psms">
                <assert_contents>
                    <has_text text="K.KTLKSDGVAGLYR.G" />
                    <not_has_text text="K.RSPSGNISTNSMR.P" />
                </assert_contents>
            </output>
        </test>
        <test>
            <param name="inputs" value="test_input.mzML" ftype="mzml"/>
            <param name="searchdb" value="uniprot-proteome_UP000002311Condensed-first100entries.xml" ftype="uniprotxml"/>
            <param name="fdr" value="1"/>
            <param name="mvmi" value="1024"/>
            <param name="precmt" value="Monoisotopic"/>
            <param name="precmtu" value="Da"/>
            <param name="precmtv" value="2.5"/>
            <param name="fm" value="carbamidomethylation of C"/>
	    <param name="vm" value="oxidation of M"/>
	    <param name="p" value="trypsin"/>
            <param name="adv_options_selector" value="set"/>
            <param name="prodmtv" value=".01"/>
            <output name="output_psms">
                <assert_contents>
                    <has_text text="K.KTLKSDGVAGLYR.G" />
                    <not_has_text text="K.KYFLENKIGTDR.R" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
Morpheus_ is a database search algorithm for high-resolution tandem mass spectra. 

When a Uniprot Proteome XML file is used for the search database, Morpheus will include all known modifications from the proteome in searching for peptide spectral matches.  To fetch Uniprot Proteome XML files see:  http://www.uniprot.org/help/retrieve_sets

**INPUTS**

  - spectral data file in mzML format 
  - protein search database, either a fasta file or a uniprot proteome xml file

**OUTPUTS**

  - summary.tsv
  - input.log.txt
  - input.PSMs.tsv
  - input.unique_peptides.tsv
  - input.protein_groups.tsv
  - input.pep.xml

.. _Morpheus: http://morpheus-ms.sourceforge.net/

    ]]></help>
    <citations>
        <citation type="doi">10.1021/pr301024c</citation>
    </citations>
</tool>