view flashlfq.xml @ 8:29fe0caf7bca draft

"planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/flashlfq commit 5b42feb3b752fdb22723d6bcedd301b6b1dd8dd3"
author galaxyp
date Wed, 08 Jan 2020 21:54:15 -0500
parents cf494694e8e5
children 878f6725c4f3
line wrap: on
line source

<tool id="flashlfq" name="FlashLFQ" version="1.0.2.1">
    <description>ultrafast label-free quantification for mass-spectrometry proteomics</description>
    <requirements>
        <requirement type="package" version="1.0.2">flashlfq</requirement>
    </requirements>
    <command><![CDATA[
        #import re
        #set $idt_path = $re.sub('\s','_',$re.sub('[.][^.]*$','',$idt.display_name.split('/')[-1])) + ".psmtsv"
        ln -s '${idt}' '${idt_path}' &&
        mkdir spectrum_dir &&
        #for $peak_list in $peak_lists:
            #set $ext = '.mzML'
            #if $peak_list.extension.endswith('raw')
                #set $ext = '.RAW'
            #end if
            #set $input_name = $re.sub('[.][^.]*$','',$peak_list.display_name.split('/')[-1]) + $ext
            ln -s '${peak_list}' 'spectrum_dir/${input_name}' &&
        #end for
        #if $experiment.use_design == 'true':
          ln -s '${experiment.experimental_design}' 'spectrum_dir/ExperimentalDesign.tsv' &&
        #end if
        echo 'y' | 
        FlashLFQ 
        --idt '$idt_path'
        --rep "./spectrum_dir"
        --ppm $ppm
        --iso $iso
        --nis $nis
        $int
        $chg
        $mbr
        #if $experiment.use_design == 'true':
            $experiment.nor
            #if $experiment.bayesian.calculate == 'true':
                --bay true
                --ctr '$experiment.bayesian.ctr'
                #if str($experiment.bayesian.fcc):
                    --fcc $experiment.bayesian.fcc
                #end if
                $experiment.bayesian.sha
                $experiment.bayesian.rmc
                --mcm $experiment.bayesian.mcm
                #if str($experiment.bayesian.rns):
                    --rns $experiment.bayesian.rns
                #end if
            #end if
        #end if
        --out out > logfile.txt
    ]]></command>
    <inputs>
        <param argument="--idt" type="data" format="tabular" label="identification file"
             help="MetaMorpheus,Morpheus,PeptideShaker PSM Report,MaxQuant"/>
        <param name="peak_lists" type="data" format="mzml,raw,thermo.raw" multiple="true" label="spectrum files"/>
        <param argument="--ppm" type="float" value="10" min="1" max="20" label="monoisotopic ppm tolerance"/>
        <param argument="--iso" type="float" value="5" min="1" max="10" label="isotopic distribution tolerance in ppm"/>
        <param argument="--nis" type="integer" value="2" min="2" max="30" label="number of isotopes required to be observed"/>
        <param argument="--int" type="boolean" truevalue="--int true" falsevalue="" checked="false"
            label="integrate peak areas (not recommended)"/>
        <param argument="--chg" type="boolean" truevalue="--chg true" falsevalue="" checked="false"
            label="use only precursor charge state"/>
        <param argument="--mbr" type="boolean" truevalue="--mbr true" falsevalue="" checked="false"
            label="match between runs"/>
        <param argument="--mrt" type="float" value="2.5" min=".01" max="60" label="maximum MBR window in minutes"/>
        <conditional name="experiment">
            <param name="use_design" type="select" label="Use experimnetal design for normalization or protein fold-change analysis">
                <option value="false">No</option>
                <option value="true">Yes</option>
            </param>
            <when value="false"/>
            <when value="true">
                <param name="experimental_design" type="data" format="tabular" label="ExperimentalDesign.tsv"/>
                <param argument="--nor" type="boolean" truevalue="--nor true" falsevalue="" checked="true"
                    label="normalize intensity results"/>
                <conditional name="bayesian">
                    <param name="calculate" type="select" label="Perform Bayesian protein fold-change analysis">
                        <option value="false">No</option>
                        <option value="true">Yes</option>
                    </param>
                    <when value="false"/>
                    <when value="true">
                        <param argument="--ctr" type="select" value="" label="control condition for Bayesian protein fold-change analysis">
                            <options from_dataset="experimental_design">
                                <column name="name" index="1"/>
                                <column name="value" index="1"/>
                                <filter type="static_value" name="heading_ctr" column="1" value="Condition" keep="False"/>
                                <filter type="unique_value" name="unique_ctr" column="1"/>
                                <filter type="sort_by" name="sorted_ctr" column="1"/>
                            </options>
                        </param>
                        <param argument="--fcc" type="float" value="" min="0.01" label="fold-change cutoff" optional="true" 
                            help="Leave blank to detemine emperically from data."/>
                        <param argument="--sha" type="boolean" truevalue="--sha true" falsevalue="" checked="false"
                            label="use shared peptides for protein quantification"/>
                        <param argument="--rmc" type="boolean" truevalue="--rmc true" falsevalue="" checked="false"
                            label="require MS/MS ID in condition"/>
                        <param argument="--mcm" type="integer" value="500" min="500" label="number of markov-chain monte carlo iterations"/>
                        <param argument="--rns" type="integer" value="" optional="true" label="random seed"/>
                    </when>
                </conditional>
            </when>
        </conditional>
    </inputs>

    <outputs>
       <data name="log" format="txt" label="${tool.name} on ${on_string}: Log" from_work_dir="logfile.txt"/>
        <data name="toml" format="txt" label="${tool.name} on ${on_string}: FlashLfqSettings.toml" from_work_dir="out/FlashLfqSettings.toml"/>
        <data name="quantifiedPeaks" format="tabular" label="${tool.name} on ${on_string}: QuantifiedPeaks.tsv" from_work_dir="out/QuantifiedPeaks.tsv"/>
        <data name="quantifiedPeptides" format="tabular" label="${tool.name} on ${on_string}: QuantifiedPeptides.tsv" from_work_dir="out/QuantifiedPeptides.tsv"/>
        <data name="quantifiedProteins" format="tabular" label="${tool.name} on ${on_string}: QuantifiedProteins.tsv" from_work_dir="out/QuantifiedProteins.tsv"/>
        <data name="foldChange" format="tabular" label="${tool.name} on ${on_string}: BayesianFoldChangeAnalysis.tsv" from_work_dir="out/BayesianFoldChangeAnalysis.tsv">
            <filter>'bayesian' in experiment and 'ctr' in experiment['bayesian']</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="idt" value="aggregatePSMs_5ppmAroundZero.psmtsv" ftype="tabular"/>
            <param name="peak_lists" value="sliced-mzml.mzML" ftype="mzml"/>
            <param name="ppm" value="12"/>
            <param name="iso" value="6"/>
            <output name="quantifiedPeaks">
                <assert_contents>
                    <has_text text="EGFQVADGPLYR" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
**FlashLFQ** is an ultrafast label-free quantification for mass-spectrometry proteomics.
https://github.com/smith-chem-wisc/FlashLFQ/wiki


**Accepted command-line arguments:**

::

    --idt [string|identification file path]
    --rep [string|directory containing spectral data files]
    --out [string|output directory]
    --ppm [double|ppm tolerance]
    --nor [bool|normalize intensity results]
    --mbr [bool|match between runs]
    --sha [bool|use shared peptides for protein quantification]
    --bay [bool|Bayesian protein fold-change analysis]
    --ctr [string|control condition for Bayesian protein fold-change analysis]
    --fcc [double|fold-change cutoff for Bayesian protein fold-change analysis]

**Advanced settings:**

::

    --sil [bool|silent mode]
    --int [bool|integrate peak areas (not recommended)]
    --iso [double|isotopic distribution tolerance in ppm]
    --mrt [double|maximum MBR window in minutes]
    --chg [bool|use only precursor charge state]
    --nis [int|number of isotopes required to be observed]
    --rmc [bool|require MS/MS ID in condition]
    --mcm [int|number of markov-chain monte carlo iterations for the Bayesian protein fold-change analysis]
    --rns [int|random seed for the Bayesian protein fold-change analysis]


**Tab-Delimited Identification Text File**

The first line of the text file should contain column headers identifying what each column is. Note that MetaMorpheus (.psmtsv), Morpheus, MaxQuant (msms.txt), and TDPortal tab-delimited column headers are supported natively and such files can be read without modification. For search software that lists decoys and PSMs above 1% FDR (e.g., MetaMorpheus), you may want to remove these prior to FlashLFQ analysis. FlashLFQ will probably crash if ambiguous PSMs are passed into it (e.g., a PSM with more than 2 peptides listed in one line).

The following headers are required in the list of MS/MS identifications:

  - **File Name** - File extensions should be tolerated, but no extension is tested more extensively (e.g. use MyFile and not MyFile.mzML)
  - **Base Sequence** - Should only contain amino acid sequences, or it will likely result in a crash
  - **Full Sequence** - Modified sequence. Can contain any letters, but must be consistent between the same peptidoform to get accurate results
  - **Peptide Monoisotopic Mass** - Theoretical monoisotopic mass, including modification mass
  - **Scan Retention Time** - MS/MS identification scan retention time
  - **Precursor Charge** - Charge of the ion selected for MS/MS resulting in the identification
  - **Protein Accession** - Protein accession(s) for the peptide; protein quantification is still preliminary

**ExperimentalDesign File**

The ExperimentalDesign_ File should have 5 columns separated by TAB characters: 

  - SpectrumFileName - Without the file extension
  - Condition - Cannot be blank
  - Sample - an integer, at least 1. Each condition must have continuous sample numbers starting at 1. For example, samples 1, 3, and 4 are not valid because sample 2 is missing. In this case you would label the samples as 1, 2, and 3.
  - Fraction - an integer, at least 1. Each sample must have continuous fraction numbers starting at 1. If your data is not fractionated, just enter 1 for all fractions. It is OK for two samples to have different total numbers of fractions. It is NOT recommended to use a sample if it is missing a fraction with significant peptide intensity (e.g., if sample 2 is missing fraction #5 out of 10 total fractions).
  - Replicate - an integer, at least 1. Each fraction must have continuous replicate numbers starting at 1.

::

    For example, with spectrum files named:

      - 20130510_EXQ1_IgPa_QC_UPS1_01.mzml
      - 20130510_EXQ1_IgPa_QC_UPS1_02.mzml
      - 20130510_EXQ1_IgPa_QC_UPS2_01.mzml
      - 20130510_EXQ1_IgPa_QC_UPS2_02.mzml

    The ExperimentalDesign File:

        FileName	Condition	Biorep	Fraction	Techrep
        20130510_EXQ1_IgPa_QC_UPS1_01	S1	1	1	1
        20130510_EXQ1_IgPa_QC_UPS1_02	S1	2	1	1
        20130510_EXQ1_IgPa_QC_UPS2_01	S2	1	1	1
        20130510_EXQ1_IgPa_QC_UPS2_02	S2	2	1	1


**Outputs**:

  - **QuantifiedProteins.tsv** - Lists protein accession and in the future will include gene and organism if the TSV contains it. The intensity is either a) the sum of the 3 most intense peptides or b) (Advanced protein quant) a weighted-average of the intensities of the peptides assigned to the protein. The weights are determined by how well the peptide co-varies with the other peptides assigned to that protein.

  - **QuantifiedPeaks.tsv** - Each chromatographic peak is shown here, even peaks that were not quantifiable (peak intensity = 0). Details about each peak, such as number of PSMs mapped, start/apex/end retention times, ppm error, etc are contained in this file. A peptide can have multiple peaks over the course of a run (e.g., oxidized peptidoforms elute at different times, etc). Ambiguous peaks are displayed with a | (pipe) delimiter to indicate more than one peptide mapped to that peak.

  - **QuantifiedPeptides.tsv** - Peptide intensities are summed by modified sequence; this makes it convenient to compare modified peptidoform intensities across runs.

  - **Log.txt** - Log of the FlashLFQ run. 


.. _FlashLFQ: https://github.com/smith-chem-wisc/FlashLFQ/wiki
.. _ExperimentalDesign: https://github.com/smith-chem-wisc/FlashLFQ/wiki/Experimental-Design

    ]]></help>
    <citations>
        <citation type="doi">10.1021/acs.jproteome.7b00608</citation>
    </citations>
</tool>