view flashlfq.xml @ 4:908ab13490dc draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/flashlfq commit 047addefbac7468a92ba4fc04899bd5136c58ea4
author galaxyp
date Wed, 30 Jan 2019 05:39:17 -0500
parents ff1148892ce3
children cb346052c5de
line wrap: on
line source

<tool id="flashlfq" name="FlashLFQ" version="0.1.111">
    <description>ultrafast label-free quantification for mass-spectrometry proteomics</description>
    <requirements>
        <requirement type="package" version="0.1.111">flashlfq</requirement>
    </requirements>
    <command><![CDATA[
        #import re
        #set $idt_path = $re.sub('\s','_',$re.sub('[.][^.]*$','',$idt.display_name.split('/')[-1])) + ".psmtsv"
        ln -s '${idt}' '${idt_path}' &&
        mkdir spectrum_dir &&
        #for $peak_list in $peak_lists:
            #set $input_name = $re.sub('[.][^.]*$','',$peak_list.display_name.split('/')[-1]) + ".mzML"
            ln -s '${peak_list}' 'spectrum_dir/${input_name}' &&
        #end for
        #if $normalize.nor == 'true':
          #set $input_name = $re.sub('[.][^.]*$','',$experimental_design.display_name.split('/')[-1]) + ".tsv"
          ln -s '${experimental_design}' 'spectrum_dir/${input_name}' &&
        #end if
        FlashLFQ 
        --idt '$idt_path'
        --rep "./spectrum_dir"
        --ppm $ppm
        --iso $iso
        --nis $nis
        #if $normalize.nor == 'true':
        --nor true
        #end if
        #if $intensity == 'integrate':
            --int true
        #end if
        #if $charge == 'precursor':
            --chg true
        #end if
        $rmm $mbr $pro
        --out . > logfile.txt
        && cat logfile.txt | sed 's/\(Analysis summary for:\).*working./\1 /' > '$log'
        && cp *_FlashLFQ_QuantifiedProteins.tsv '$quantifiedProteins'
        && cp *_FlashLFQ_QuantifiedPeptides.tsv '$quantifiedPeptides'
        && cp *_FlashLFQ_QuantifiedPeaks.tsv '$quantifiedPeaks'
    ]]></command>

    <inputs>
        <param name="idt" type="data" format="tabular" label="identification file"
             help="MetaMorpheus,Morpheus,PeptideShaker PSM Report,MaxQuant"/>
        <param name="peak_lists" type="data" format="mzml" multiple="true" label="spectrum files"/>
        <param name="ppm" type="float" value="10" min="1" max="20" label="monoisotopic ppm tolerance"/>
        <param name="iso" type="float" value="5" min="1" max="10" label="isotopic distribution tolerance in ppm"/>
        <param name="nis" type="integer" value="2" min="1" max="30" label="number of isotopes required to be observed"/>
        <param name="intensity" type="select" label="intensity">
            <option value="apex" selected="true">use the apex intensity</option>
            <option value="integrate">integrate chromatographic peak intensity</option>
        </param>
        <param name="charge" type="select" label="charge">
            <option value="all" selected="true">use all identification detected charge states</option>
            <option value="precursor">use precursor charge</option>
        </param>
        <param name="rmm" type="boolean" truevalue="--rmm true" falsevalue="--rmm false" checked="true"
            label="require observed monoisotopic mass peak"/>
        <conditional name="normalize">
            <param name="nor" type="select" label="normalize intensity results">
                <option value="false">No</option>
                <option value="true">Yes</option>
            </param>
            <when value="false"/>
            <when value="true">
                <param name="experimental_design" type="data" format="tabular" label="ExperimentalDesign.tsv"/>
            </when>
        </conditional>
        <param name="mbr" type="boolean" truevalue="--mbr true" falsevalue="--mbr false" checked="false"
            label="match between runs"/>
        <param name="mrt" type="float" value="1.5" min="0" max="30" label="maximum MBR window in minutes"/>
        <param name="pro" type="boolean" truevalue="--pro true" falsevalue="--pro false" checked="false"
            label="advanced protein quantification"/>
    </inputs>

    <outputs>
       <data name="log" format="txt" label="${tool.name} on ${on_string}: Log" />
        <data name="quantifiedPeaks" format="tabular" label="${tool.name} on ${on_string}: QuantifiedPeaks.tsv" />
        <data name="quantifiedPeptides" format="tabular" label="${tool.name} on ${on_string}: QuantifiedPeptides.tsv" />
        <data name="quantifiedProteins" format="tabular" label="${tool.name} on ${on_string}: QuantifiedProteins.tsv" />

    </outputs>
    <tests>
        <test>
            <param name="idt" value="aggregatePSMs_5ppmAroundZero.psmtsv" ftype="tabular"/>
            <param name="peak_lists" value="sliced-mzml.mzML" ftype="mzml"/>
            <param name="ppm" value="12"/>
            <param name="iso" value="6"/>
            <output name="quantifiedPeaks">
                <assert_contents>
                    <has_text text="EGFQVADGPLYR" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[

**FlashLFQ** is an ultrafast label-free quantification for mass-spectrometry proteomics.

**Accepted command-line arguments:**

::

    --idt [string | identification file path (TSV format)]
    --raw [string | MS data file (.raw or .mzML)]
    --rep [string | repository containing MS data files]
    --ppm [double | monoisotopic ppm tolerance] (default = 10)
    --iso [double | isotopic distribution tolerance in ppm] (default = 5)
    --sil [boolean | silent mode; no console output] (default = false)
    --pau [boolean | pause at end of run] (default = true)
    --int [boolean | integrate chromatographic peak intensity instead of using 
          the apex intensity] (default = false)
    --chg [boolean | use only precursor charge state; when set to false, FlashLFQ looks 
          for all charge states detected in the MS/MS identification file for each peptide] (default = false)
    --mbr [bool|match between runs]
    --rmm [bool|require observed monoisotopic mass peak]
    --nis [int|number of isotopes required to be observed]


**Tab-Delimited Identification Text File**

The first line of the text file should contain column headers identifying what each column is. Note that MetaMorpheus (.psmtsv), Morpheus, MaxQuant (msms.txt), and TDPortal tab-delimited column headers are supported natively and such files can be read without modification. For search software that lists decoys and PSMs above 1% FDR (e.g., MetaMorpheus), you may want to remove these prior to FlashLFQ analysis. FlashLFQ will probably crash if ambiguous PSMs are passed into it (e.g., a PSM with more than 2 peptides listed in one line).

The following headers are required in the list of MS/MS identifications:

  - **File Name** - File extensions should be tolerated, but no extension is tested more extensively (e.g. use MyFile and not MyFile.mzML)
  - **Base Sequence** - Should only contain amino acid sequences, or it will likely result in a crash
  - **Full Sequence** - Modified sequence. Can contain any letters, but must be consistent between the same peptidoform to get accurate results
  - **Peptide Monoisotopic Mass** - Theoretical monoisotopic mass, including modification mass
  - **Scan Retention Time** - MS/MS identification scan retention time
  - **Precursor Charge** - Charge of the ion selected for MS/MS resulting in the identification
  - **Protein Accession** - Protein accession(s) for the peptide; protein quantification is still preliminary


**Outputs**:

  - **QuantifiedProteins.tsv** - Lists protein accession and in the future will include gene and organism if the TSV contains it. The intensity is either a) the sum of the 3 most intense peptides or b) (Advanced protein quant) a weighted-average of the intensities of the peptides assigned to the protein. The weights are determined by how well the peptide co-varies with the other peptides assigned to that protein.

  - **QuantifiedPeaks.tsv** - Each chromatographic peak is shown here, even peaks that were not quantifiable (peak intensity = 0). Details about each peak, such as number of PSMs mapped, start/apex/end retention times, ppm error, etc are contained in this file. A peptide can have multiple peaks over the course of a run (e.g., oxidized peptidoforms elute at different times, etc). Ambiguous peaks are displayed with a | (pipe) delimiter to indicate more than one peptide mapped to that peak.

  - **QuantifiedPeptides.tsv** - Peptide intensities are summed by modified sequence; this makes it convenient to compare modified peptidoform intensities across runs.

  - **Log.txt** - Log of the FlashLFQ run. 



    ]]></help>
    <citations>
        <citation type="doi">10.1021/acs.jproteome.7b00608</citation>
    </citations>
</tool>