view align_samples.xml @ 2:7b6f148bd99f draft default tip

"planemo upload for repository https://github.com/computational-metabolomics/dimspy-galaxy commit 80069808371b58f45da0c8133c27d67ac1a5b448"
author computational-metabolomics
date Wed, 17 Feb 2021 10:57:52 +0000
parents c72868b80c29
children
line wrap: on
line source

<tool id="dimspy_align_samples" name="Align Samples" version="@TOOL_VERSION@+galaxy1">
    <description> - Align peaks across Peaklists</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <command detect_errors="exit_code">
    <![CDATA[
        dimspy align-samples
        --input '$hdf5_file_in'
        --output '$hdf5_file_out'
        #if $filelist
            --filelist '$filelist'
        #end if
        --ppm $ppm
        --ncpu \${GALAXY_SLOTS:-1}
        --block-size $adv.block_size
        #if $hdf5_to_txt.standard
            &&
            @HDF5_PM_TO_TXT@
        #end if
        #if $hdf5_to_txt.comprehensive
            &&
            @HDF5_PM_TO_TXT_COMPREHENSIVE@
        #end if
    ]]>
    </command>
    <inputs>
        <param name="hdf5_file_in" argument="--input" type="data" format="h5" label="Peaklists (HDF5 file)" help="" />
        <param name="filelist" argument="--filelist" type="data" format="tsv,tabular" optional="true" label="Filelist / Samplelist" help="Only provide a filelist if you like to exclude Peaklists, update the metadata (e.g. classLabel), or if you have not provided a filelist for  Process Scans or Replicate Filter." />
        <param name="ppm" argument="--ppm" type="float" value="2.0" label="Ppm error tolerance" help="Maximum tolerated m/z deviation across samples in parts per million (ppm)." />
        <param name="delimiter" argument="--delimiter" type="hidden" value="tab" />
        <expand macro="hdf5_pm_to_txt" />
        <section name="adv" title="Advanced options" expanded="False">
            <param name="block_size" argument="--block-size" type="integer" value="5000" label="Block size" help="The size of each block of peaks to perform clustering on." />
        </section>
    </inputs>
    <outputs>
        <expand macro="outputs_peak_intensity_matrix" />
    </outputs>
    <tests>
        <test>
            <param name="hdf5_file_in" value="pls_rf.h5" ftype="h5"/>
            <param name="ppm" value="2.0"/>
            <param name="delimiter" value="tab"/>
            <conditional name="hdf5_to_txt" >
                <param name="standard" value="True"/>
                <param name="comprehensive" value="False"/>
                <param name="samples_representations" value="rows"/>
                <param name="matrix_attr" value="intensity"/>
            </conditional>
            <output name="hdf5_file_out" file="pm_as.h5" ftype="h5" compare="sim_size"/>
            <output name="matrix_file_out" file="peak_matrix_as.txt" ftype="tsv"/>
        </test>
        <test>
            <param name="hdf5_file_in" value="pls_rf.h5" ftype="h5"/>
            <param name="ppm" value="2.0"/>
            <param name="delimiter" value="tab"/>
            <conditional name="hdf5_to_txt" >
                <param name="standard" value="True"/>
                <param name="comprehensive" value="False"/>
                <param name="samples_representations" value="rows"/>
                <param name="matrix_attr" value="intensity"/>
            </conditional>
            <section name="adv">
                <param name="block_size" value="10000"/>
            </section>
            <output name="hdf5_file_out" file="pm_as.h5" ftype="h5" compare="sim_size"/>
            <output name="matrix_file_out" file="peak_matrix_as.txt" ftype="tsv"/>
        </test>
        <test>
            <param name="hdf5_file_in" value="pls_rf.h5" ftype="h5"/>
            <param name="ppm" value="2.0"/>
            <param name="delimiter" value="tab"/>
            <conditional name="hdf5_to_txt">
                <param name="standard" value="True"/>
                <param name="comprehensive" value="False"/>
                <param name="representation_samples" value="columns"/>
                <param name="matrix_attr" value="intensity"/>
            </conditional>
            <output name="hdf5_file_out" file="pm_as.h5" ftype="h5" compare="sim_size"/>
            <output name="matrix_file_out" file="peak_matrix_as_t.txt" ftype="tsv"/>
        </test>
        <test>
            <param name="hdf5_file_in" value="pls_rf.h5" ftype="h5"/>
            <param name="ppm" value="2.0"/>
            <param name="delimiter" value="tab"/>
            <conditional name="hdf5_to_txt">
                <param name="standard" value="True"/>
                <param name="comprehensive" value="True"/>
                <param name="samples_representations" value="rows"/>
                <param name="matrix_attr" value="mz"/>
            </conditional>
            <output name="hdf5_file_out" file="pm_as.h5" ftype="h5" compare="sim_size"/>
            <output name="matrix_file_out" file="peak_matrix_as_mz.txt" ftype="tsv"/>
            <output name="matrix_comprehensive_file_out" file="peak_matrix_as_mz_compr.txt" ftype="tsv"/>
        </test>
    </tests>
    <help>
-------------
Align Samples
-------------

..

-------------------------------

Description
-----------

Standard DIMS processing workflow: Process Scans -> [Replicate Filter] -> **Align Samples** -> Blank Filter -> Sample Filter -> [Missing values sample filter] -> Pre-processing -> Statistics

|

This tool takes the peaklists for all study samples and merges them in to single aligned peak matrix. The peak matrix comprises a table, with samples along one axis and the mass-to-charge ratios of detected mass spectral peaks along the opposite axis. At the intersection of sample and mass-to-charge ratio, the intensity is given for a specific peak in a specific sample (if no intensity recorded, then ‘0’ is inserted).

-------------------------------

Parameters
----------

**Peaklists (HDF5 file)** (REQUIRED) - a HDF5 file containing all peaklists to undergo alignment.

|

**Filelist / Samplelist** (REQUIRED) - a file of type ‘tabular’ with the following required columns (additional metadata columns may also be included, e.g. “collectionTime”, etc.):

    - **filename** - the name of the .raw or .mzML files from which peaklists were extracted using the “Process Scans” tool
    
    - **batch** - a numeric value indicating the analysis batches samples were analysed in (if a single large analytica run then the default = 1)

    - **classLabel** - a string indicating the experiment classes the samples belong to (e.g. control, QC, blank/placebo, exposed/treatment)

    - **injectionOrder** - a numeric value indicating the order in which samples were analysed.

|

**ppm error tolerance** (REQUIRED; default = 2.0) - a numeric value equal-to or greater-than 0. 

This parameter will influence the alignment of peaks from input peaklists. Peaks from distinct peaklists (corresponding to individual study samples) are aligned if the difference between their mass-to-charge ratios, when divided by the average of their mass-to-charge ratios and multiplied by 1 × 10\ :sup:`6` \, is equal-to or less-than than this parameter value (i.e. the difference between the mass-to-charge ratios, measured on the ppm scale, is less than the user-defined “ppm error tolerance”). 

|

@help_options_addtional_output@

|

------------------------------

Output file(s)
--------------

@help_outputs_matrix@

-------------------------------------------------------------

@github_developers_contributors@
@license@
    </help>
    <expand macro="citations" />
</tool>