view sample_filter.xml @ 1:d2f75a01c428 draft

"planemo upload for repository https://github.com/computational-metabolomics/dimspy-galaxy commit 680116d0cf6a6d7246cba655452dea43269aeba4"
author computational-metabolomics
date Tue, 28 Apr 2020 17:37:36 -0400
parents 415fe844ec6c
children
line wrap: on
line source

<tool id="dimspy_sample_filter" name="Sample Filter" version="@TOOL_VERSION@+galaxy@GALAXY_TOOL_VERSION@">
    <description>Remove peaks that fail to appear in x-out-of-n (biological) samples</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code">
    <![CDATA[
        dimspy sample-filter
        --input '$hdf5_file_in'
        --output '$hdf5_file_out'
        --min-fraction $min_fraction
        ${within}
        #if $rsd_threshold
            --rsd-threshold $rsd_threshold
        #end if
        #if $qc_label
            --qc-label '$qc_label'
        #end if
        #if $hdf5_to_txt.standard
            &&
            @HDF5_PM_TO_TXT@
        #end if
        #if $hdf5_to_txt.comprehensive
            &&
            @HDF5_PM_TO_TXT_COMPREHENSIVE@
        #end if
    ]]>
    </command>
    <inputs>
        <param name="hdf5_file_in" argument="--input" type="data" format="h5" label="Peak Intensity Matrix (HDF5 file)" help="" />
        <param name="within" argument="--within" type="boolean" checked="false" truevalue="--within" falsevalue="" label="Apply sample filter within each sample class."  help="" />
        <param name="min_fraction" argument="--min-fraction" type="float" min="0" max="1.0" value="0.5" label="Minimum fraction" help="Minimum fraction (i.e. percentage) of samples a peak has to be present" />
        <param name="rsd_threshold" type="hidden" value="" label="Relative standard deviation threshold" help="Leave empty to skip this step" argument="--rsd-threshold"/>
        <param name="qc_label" argument="--qc-label" type="hidden" value="" label="Label for the QC sample (RSD filter only)" help="Peaks that have a larger RSD than the QC peaks are removed (Leave empty to skip this step)." >
            <validator type="regex" message="Value may include alphanumeric characters, underscores, dashes and spaces.">[A-Za-z0-9_\- ]*</validator>
        </param>
        <param name="delimiter" argument="--delimiter" type="hidden" value="tab" label="" help=""/>
        <expand macro="hdf5_pm_to_txt" />
    </inputs>
    <outputs>
        <expand macro="outputs_peak_intensity_matrix" />
    </outputs>
    <tests>
        <test>
            <param name="hdf5_file_in" value="pm_as_bf.h5" ftype="h5"/>
            <param name="within" value=""/>
            <param name="min_fraction" value="0.5"/>
            <param name="rsd_threshold" value=""/>
            <param name="qc_label" value=""/>
            <param name="delimiter" value="tab"/>
            <conditional name="hdf5_to_txt">
                <param name="standard" value="True"/>
                <param name="comprehensive" value="True"/>
                <param name="representation_samples" value="rows"/>
                <param name="matrix_attr" value="intensity"/>
            </conditional>
            <output name="hdf5_file_out" file="pm_as_bf_sf.h5" ftype="h5" compare="sim_size"/>
            <output name="matrix_file_out" file="peak_matrix_as_bf_sf.txt" ftype="tsv" compare="sim_size"/>
            <output name="matrix_comprehensive_file_out" file="peak_matrix_as_bf_sf_compr.txt" ftype="tsv"/>
        </test>
        <test>
            <param name="hdf5_file_in" value="pm_as_bf.h5" ftype="h5"/>
            <param name="within" value=""/>
            <param name="min_fraction" value="0.5"/>
            <param name="rsd_threshold" value=""/>
            <param name="qc_label" value=""/>
            <param name="delimiter" value="tab"/>
            <conditional name="hdf5_to_txt">
                <param name="standard" value="True"/>
                <param name="comprehensive" value="True"/>
                <param name="representation_samples" value="columns"/>
                <param name="matrix_attr" value="mz"/>
            </conditional>
            <output name="hdf5_file_out" file="pm_as_bf_sf.h5" ftype="h5" compare="sim_size"/>
            <output name="matrix_file_out" file="peak_matrix_as_bf_sf_mz.txt" ftype="tsv"/>
            <output name="matrix_comprehensive_file_out" file="peak_matrix_as_bf_sf_mz_compr.txt" ftype="tsv"/>
        </test>
        <test>
            <param name="hdf5_file_in" value="pm_as_bf.h5" ftype="h5"/>
            <param name="within" value="--within"/>
            <param name="min_fraction" value="0.5"/>
            <param name="rsd_threshold" value=""/>
            <param name="qc_label" value=""/>
            <param name="delimiter" value="tab"/>
            <conditional name="hdf5_to_txt">
                <param name="standard" value="True"/>
                <param name="comprehensive" value="True"/>
                <param name="representation_samples" value="rows"/>
                <param name="matrix_attr" value="intensity"/>
            </conditional>
            <output name="hdf5_file_out" file="pm_as_bf_sf_within.h5" ftype="h5" compare="sim_size"/>
            <output name="matrix_file_out" file="peak_matrix_as_bf_sf_within.txt" ftype="tsv"/>
            <output name="matrix_comprehensive_file_out" file="peak_matrix_as_bf_sf_within_compr.txt" ftype="tsv"/>
        </test>
    </tests>
    <help>
-------------
Sample Filter
-------------

..

----------------------------------------------

Description
-----------

Standard DIMS processing workflow: Process Scans -> [Replicate Filter] -> Align Samples -> Blank Filter -> **Sample Filter** -> [Missing values sample filter] -> Pre-processing -> Statistics

|

There are many and varied reasons why a peak may not have been detected in all study samples, including: due to having an intensity (concentration) close to the signal-to-noise limit of the system; due to having been present in only one of the study classes (e.g. a drug administered to the ‘treatment’ class samples); due to ion suppression/enhancement effects in the mass spectrometer source region; etc. The Sample Filter tool allows users to remove peaks from the input peak intensity matrix that were detected in fewer-than a user-defined minimum number of study samples. 

------------------------------------

Parameters
-----------

**Peak Intensity Matrix (HDF5 file)** (REQUIRED) - a HDF5 file containing the peak intensity matrix to undergo sample filtering.


**Apply sample filter within each sample class** (REQUIRED; default **No**) 

    - **No** - check across ALL classes simultaneously whether greater-than the user-defined “Minimum fraction” of samples contained an intensity value for a specific mass spectral peak.

    - **Yes** - check within EACH class separately whether greater-than the user-defined “Minimum fraction” of samples contained an intensity value for a specific mass spectral peak. 

      **IMPORTANT**: if in ANY class a peak is detected in greater-than the user-defined minimum fraction of samples, then the peak is retained in the output peak matrix. For classes in which this condition is not met, the peak intensity recorded for that peak (if any) will still be presented in the output peak matrix. If no peak intensity was recorded in a sample, then a ‘0’ is inserted in to the peak matrix.


**Minimum fraction** (REQUIRED, default 0.5) - a numeric value between 0 and 1 indicating the proportion of study samples in which a peak must have a recorded intensity value in order for it to be retained in the output peak intensity matrix; e.g. 0.5 means that at least 50% of samples (whether assessed across all classes, or within each class individually) must have a recorded intensity value for a specific peak in order for it to be retained in the output peak matrix.

|

@help_options_addtional_output@

-------------------------------


Output file(s)
--------------

**IMPORTANT** - in all outputs except for the (optional) comprehensive output, if fewer-than the user-defined “Minimum fraction” of samples (whether assessed within a class, or across all classes) had a recorded intensity value (either within or across classes) for a given peak, then that peak will be removed from the output peak matrix. Note that when assessing within classes, only one class needs to have recorded intensity values in greater-than the "Minimum fraction" of samples in order for the peak to be retained in the output peak matrix.

@help_outputs_matrix@

-------------------------------------

@github_developers_contributors@
@license@
    </help>
    <expand macro="citations" />
</tool>