view maldi_quant_preprocessing.xml @ 0:e2aa05746a69 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/MALDIquant commit 5feaf3d0e0da8cef1241fecc1f4d6f81324594e6
author galaxyp
date Wed, 22 Aug 2018 11:49:06 -0400
parents
children 0892a051eb17
line wrap: on
line source

<tool id="maldi_quant_preprocessing" name="MALDIquant preprocessing" version="1.18.0.0">
    <description>
        Preprocessing of mass-spectrometry imaging data
    </description>
    <macros>
        <import>maldi_macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <command detect_errors="exit_code">
    <![CDATA[
        #if $infile.ext == 'imzml'
            cp '${infile.extra_files_path}/imzml' infile.imzML &&
            cp '${infile.extra_files_path}/ibd' infile.ibd &&
        #elif $infile.ext == 'analyze75'
            cp '${infile.extra_files_path}/hdr' infile.hdr &&
            cp '${infile.extra_files_path}/img' infile.img &&
            cp '${infile.extra_files_path}/t2m' infile.t2m &&
            du infile.hdr &&
            du infile.img &&
            du -s -B1 infile.hdr &&
        #else
            ln -s $infile infile.RData &&
        #end if
        Rscript "${maldi_quant_preprocessing}" &&
        mkdir $outfile_imzml.files_path &&
        mv ./out.imzMl "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true &&
        mv ./out.ibd "${os.path.join($outfile_imzml.files_path, 'ibd')}" | true &&
        echo "imzML file:" > $outfile_imzml &&
        ls -l "$outfile_imzml.files_path" >> $outfile_imzml
    ]]>
    </command>
    <configfiles>
        <configfile name="maldi_quant_preprocessing"><![CDATA[

@R_IMPORTS@

#if $restriction_conditional.restriction == 'restrict':

    print('Reading mask region')
    ## Import imzML file

    coordinate_matrix = as.matrix(read.delim("$restriction_conditional.coordinates_file", header = FALSE, stringsAsFactors = FALSE))[,1:2]

    maldi_data = importImzMl('infile.imzML',
                     coordinates = coordinate_matrix)
    pixelnames = paste0("x = ", coordinates(maldi_data)[,1],", y = ", coordinates(maldi_data)[,2])

#else:

    print('Reading entire file')
    #if $infile.ext == 'imzml'
        ## Import imzML file
        maldi_data = import( 'infile.imzML', type="imzML" )
    #elif $infile.ext == 'analyze75'
        ## Import analyze7.5 file
        maldi_data = import( 'infile.hdr' )
    #else
        loadRData <- function(fileName){
        #loads an RData file, and returns it
        load(fileName)
        get(ls()[ls() != "fileName"])
        }
        msidata = loadRData('infile.RData')

        ## save coordinates
        cardinal_coordinates = as.matrix(Cardinal::coord(msidata)[,1:2])
        ## save mz values
        cardinal_mzs = Cardinal::mz(msidata)
        ## create MALDIquant MassSpectrum object
        maldi_data = list()
        for(number_spectra in 1:ncol(msidata)){
        maldi_data[[number_spectra]] = createMassSpectrum(mass = cardinal_mzs, intensity = iData(msidata)[,number_spectra])
        }

    #end if

#end if

## Quality control plots during preprocessing 

pdf("prepro_qc_plot.pdf", fonts = "Times", pointsize = 12)
plot(0,type='n',axes=FALSE,ann=FALSE)

## if no filename is given, name of file in Galaxy history is used
    #set $filename = $infile.display_name
title(main=paste("$filename"))

#if str($tabular_annotation.load_annotation) == 'yes_annotation':
    print("use annotation file")
    ## read and extract x,y,annotation information
    input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE)
    annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)]
    colnames(annotation_input) = c("x", "y", "annotation") ## rename annotations header to default name "annotation"

    ## merge with coordinate information of MSI data
    coordinates_st = cbind(coordinates(maldi_data)[,1:2], c(1:length(maldi_data)))
    colnames(coordinates_st)[3] = "pixel_index"
    merged_annotation = merge(coordinates_st, annotation_input, by=c("x", "y"), all.x=TRUE)
    merged_annotation[is.na(merged_annotation)] = "NA"
    merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),]
    samples = as.factor(merged_annotation\$annotation)

## print annotation overview into PDF output

        ## the more annotation groups a file has the smaller will be the legend
        number_combined = length(levels(as.factor(merged_annotation\$annotation)))
        if (number_combined<20){
            legend_size = 10
        }else if (number_combined>20 && number_combined<40){
            legend_size = 9
        }else if (number_combined>40 && number_combined<60){
            legend_size = 8
        }else if (number_combined>60 && number_combined<100){
            legend_size = 7
        }else{
            legend_size = 6
        }

        combine_plot = ggplot(merged_annotation, aes(x=x, y=y, fill=annotation))+
               geom_tile() +
               coord_fixed()+
               ggtitle("Spatial orientation of annotated data")+
               theme_bw()+
               theme(plot.title = element_text(hjust = 0.5))+
               theme(text=element_text(family="ArialMT", face="bold", size=12))+
               theme(legend.position="bottom",legend.direction="vertical")+
               theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = legend_size))+
               guides(fill=guide_legend(ncol=5,byrow=TRUE))

        print(combine_plot)

#end if

#################### Preprocessing methods #####################################

## QC plot
avgSpectra = averageMassSpectra(maldi_data,method="mean")
plot(avgSpectra, main="Average spectrum for input file")

#for $method in $methods:

    #if str( $method.methods_conditional.method ) == 'Transformation':

        print('transforming')
        ##transformation
        maldi_data = transformIntensity(maldi_data, method="$method.methods_conditional.transform_method")
        ## QC plot
        avgSpectra = averageMassSpectra(maldi_data,method="mean")
        plot(avgSpectra, main="Average spectrum after transformation")


    #elif str( $method.methods_conditional.method ) == 'Smoothing':

        print('smoothing')
        ##smoothing

        #if str($method.methods_conditional.methods_for_smoothing.smooth_method ) == 'SavitzkyGolay':
        print('SavitzkyGolay')

            maldi_data = smoothIntensity(maldi_data,
                    method="SavitzkyGolay", polynomialOrder=$method.methods_conditional.methods_for_smoothing.polynomial,
                    halfWindowSize=$method.methods_conditional.halfWindowSize)

        #elif str($method.methods_conditional.methods_for_smoothing.smooth_method ) == 'MovingAverage':
            print('MovingAverage')

            maldi_data = smoothIntensity(maldi_data,
                    method="MovingAverage", weighted=$method.methods_conditional.methods_for_smoothing.weighted,
                    halfWindowSize=$method.methods_conditional.halfWindowSize)

        #end if

        ## QC plot
        avgSpectra = averageMassSpectra(maldi_data,method="mean")
        plot(avgSpectra, main="Average spectrum after smoothing")


    #elif str( $method.methods_conditional.method ) == 'Baseline':

        print('baseline removing')
        ## Remove baseline

        maldi_data = removeBaseline(maldi_data,
            method="$method.methods_conditional.baseline_method",
            iterations=$method.methods_conditional.iterations)
        ## QC plot
        avgSpectra = averageMassSpectra(maldi_data,method="mean")
        plot(avgSpectra, main="Average spectrum after baseline removal")


    #elif str( $method.methods_conditional.method ) == 'Calibrate':

        print('calibrate')
        ##calibrate

        #if $method.methods_conditional.mass_start != 0 and $method.methods_conditional.mass_end != 0:
        ## calibrate only given m/z range
        maldi_data = calibrateIntensity(maldi_data,
            method="$method.methods_conditional.calibrate_method",
            range=c($method.methods_conditional.mass_start, $method.methods_conditional.mass_end))
        #else:
        maldi_data = calibrateIntensity(maldi_data,
            method="$method.methods_conditional.calibrate_method")
        #end if
        ## QC plot
        avgSpectra = averageMassSpectra(maldi_data,method="mean")
        plot(avgSpectra, main="Average spectrum after normalization")


    #elif str( $method.methods_conditional.method ) == 'Align':

        print('align')
        ##align spectra

        #if str($method.methods_conditional.reference_for_alignment.align_ref) == 'no_reference':

            maldi_data = alignSpectra(maldi_data, halfWindowSize=$method.methods_conditional.halfWindowSize,
                SNR=$method.methods_conditional.snr,
                tolerance=$method.methods_conditional.tolerance,
                warpingMethod="$method.methods_conditional.warping_method")

        #elif str($method.methods_conditional.reference_for_alignment.align_ref) == 'yes_reference':

            ## create reference mass_vector from tabular file
            mass_vector = read.delim("$method.methods_conditional.reference_for_alignment.reference_file", header = FALSE, stringsAsFactors = FALSE)[,1]
            int_vector = rep(1,length(mass_vector))
            mass_list = createMassPeaks(mass_vector, int_vector)

            maldi_data = alignSpectra(maldi_data, halfWindowSize=$method.methods_conditional.halfWindowSize,
                SNR=$method.methods_conditional.snr,
                tolerance=$method.methods_conditional.tolerance,
                warpingMethod="$method.methods_conditional.warping_method", 
                reference = mass_list, allowNoMatches =$method.methods_conditional.reference_for_alignment.allow_nomatch, emptyNoMatches = $method.methods_conditional.reference_for_alignment.empty_nomatch)

            #if $method.methods_conditional.reference_for_alignment.remove_empty:

                #if $infile.ext == 'rdata'
                    cardinal_coordinates = cardinal_coordinates[-findEmptyMassObjects(maldi_data),] ## remove coordinates of empty spectra for Cardinal RData input
                #end if
                #if str($tabular_annotation.load_annotation) == 'yes_annotation':
                    merged_annotation = merged_annotation[-findEmptyMassObjects(maldi_data),] ## remove coordinate annotations for empty spectra
                #end if
                maldi_data = removeEmptyMassObjects(maldi_data)
            #end if
        #end if

        ## QC plot

        if (length(maldi_data)>0){
            avgSpectra = averageMassSpectra(maldi_data,method="mean")
            plot(avgSpectra, main="Average spectrum after alignment")
        }else{"All spectra are empty"}

    #end if
#end for

dev.off()

## export imzML file
if (length(maldi_data)>0){
    #if $infile.ext == 'rdata'
        MALDIquantForeign::exportImzMl(maldi_data, file="out.imzMl", processed=$export_processed, coordinates=cardinal_coordinates)
    #else
        MALDIquantForeign::exportImzMl(maldi_data, file="out.imzMl", processed=$export_processed)
    #end if

    ## export annotation tabular file
    #if str($tabular_annotation.load_annotation) == 'yes_annotation':
        write.table(merged_annotation, file="$annotation_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
    #end if
}else{"All spectra are empty, outputfiles will be empty,too."}

    ]]>
        </configfile>
    </configfiles>
    <inputs>
        <param name="infile" type="data" format="imzml,rdata" label="MS metadata" help="This file is in imzML format or Cardinal MSImageSet saved as RData"/>
        <conditional name="restriction_conditional">
            <param name="restriction" type="select" label="Read in only spectra of interest" help="This option only works for imzML files">
                <option value="no_restriction" selected="True">Calculate on entire file</option>
                <option value="restrict">Restrict to coordinates of interest</option>
            </param>
            <when value="restrict">
                <param name="coordinates_file" type="data" format="tabular" label="Tabular file with coordinates which should be read" help="x-values in first column, y-values in second column"/>
            </when>              
            <when value="no_restriction"/>
        </conditional>
        <conditional name="tabular_annotation">
            <param name="load_annotation" type="select" label="Use pixel annotation from tabular file to have updated annotation information in case empty spectra will be removed">
                <option value="no_annotation" selected="True">use no annotation</option>
                <option value="yes_annotation">use pixel annotation from a tabular file</option>
            </param>
                <when value="yes_annotation">
                    <param name="annotation_file" type="data" format="tabular" label="Use annotations from tabular file"
                    help="Tabular file with three columns: x values, y values and pixel annotations"/>
                        <param name="column_x" data_ref="annotation_file" label="Column with x values" type="data_column"/>
                        <param name="column_y" data_ref="annotation_file" label="Column with y values" type="data_column"/>
                        <param name="column_names" data_ref="annotation_file" label="Column with pixel annotations" type="data_column"/>
                        <param name="tabular_header" type="boolean" label="Tabular file contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
                </when>
                <when value="no_annotation"/>
        </conditional>
        <repeat name="methods" title="Method" min="1">
            <conditional name="methods_conditional">
                <param name="method" type="select" label="Select the method you want to apply">
                    <option value="Transformation" selected="True">Transformation</option>
                    <option value="Smoothing">Smoothing</option>
                    <option value="Baseline">Baseline removal</option>
                    <option value="Calibrate">Calibrate</option>
                    <option value="Align">Align Spectra (warping/phase correction)</option>
                    <validator type="empty_field" />
                </param>
                <when value="Transformation">
                    <param name="transform_method" type="select" label="Select your transfprormation method">
                        <option value="sqrt" selected="True">sqrt</option>
                        <option value="log">log</option>
                        <option value="log2">log2</option>
                        <option value="log10">log10</option>
                        <validator type="empty_field" />
                    </param>
                </when>
                <when value="Smoothing">
                    <conditional name="methods_for_smoothing">
                        <param name="smooth_method" type="select" label="This method smoothes the intensity values of a MassSpectrum object">
                            <option value="SavitzkyGolay" selected="True">SavitzkyGolay</option>
                            <option value="MovingAverage">MovingAverage</option>
                        </param>
                        <when value="SavitzkyGolay">
                            <param name="polynomial" value="3" type="text" label="PolynomialOrder argument to control the order of the filter"/>
                        </when>
                        <when value="MovingAverage">
                            <param name="weighted" type="boolean" label="Weighted average" help = "indicates if the average should be equal weight or if it should have weights depending on the distance from the center as calculated as 1/2^abs(-halfWindowSize:halfWindowSize) with the sum of all weigths normalized to 1" truevalue="TRUE" falsevalue="FALSE"/>
                        </when>
                    </conditional>
                    <param name="halfWindowSize" type="integer" value="10"
                        label="Half window size"
                        help="The resulting window reaches from 
                            mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]
                            (window size is 2*halfWindowSize+1).
                            The best size differs depending on the selected smoothing method."/>
                </when>
                <when value="Baseline">
                    <param name="baseline_method" type="select" label="Baseline removal method">
                        <option value="SNIP" selected="True">SNIP</option>
                        <option value="TopHat">TopHat</option>
                        <option value="ConvexHull">ConvexHull</option>
                        <option value="median">median</option>
                        <validator type="empty_field" />
                    </param>
                    <param name="iterations" type="integer" value="100"
                        label="Number of iterations"
                        help=""/>
                </when>
                <when value="Calibrate">
                    <param name="calibrate_method" type="select" label="Calibration method">
                        <option value="TIC" selected="True">TIC</option>
                        <option value="PQN">PQN</option>
                        <option value="median">median</option>
                        <validator type="empty_field" />
                    </param>
                    <param name="mass_start" type="integer" value="0"
                        label="Start of m/z range, has to be inside m/z range" 
                        help="Scaling factor is calculated on the mass range and applied to the whole spectrum"/>
                    <param name="mass_end" type="integer" value="0"
                        label="End of m/z range, has to be inside m/z range" 
                        help="The Start and End value needs to be different from 0 to be taken into account and."/>
                </when>
                <when value="Align">
                    <param name="warping_method" type="select" label="Warping methods">
                        <option value="lowess" selected="True">Lowess</option>
                        <option value="linear">Linear</option>
                        <option value="quadratic">Quadratic</option>
                        <option value="cubic">Cubic</option>
                    </param>

                    <param name="tolerance" type="float" value="0.002"
                        label="Tolerance"
                        help="Double, maximal relative deviation of a peak position (m/z) to be considered as identical" />

                    <param name="halfWindowSize" type="integer" value="20"
                        label="Half window size"
                        help="The resulting window reaches from 
                            mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]
                            (window size is 2*halfWindowSize+1).
                            The best size differs depending on the selected smoothing method."/>

                    <param name="snr" type="integer" value="2"
                        label="Signal-to-noise-ratio"
                        help=""/>

                    <conditional name="reference_for_alignment">
                        <param name="align_ref" type="select" label="Reference to which the samples should be aligned" help="Use internal calibrants to perform m/z calibration">
                            <option value="no_reference" selected="True">no reference</option>
                            <option value="yes_reference">reference from tabular file</option>
                        </param>
                        <when value="no_reference"/>
                        <when value="yes_reference">
                            <param name="reference_file" type="data" format="tabular"
                                label="Tabular file with m/z of internal calibrants (MassPeaks) which should be used for spectra alignment"
                                help="calibration of m/z values to internal calibrants, at least 2 m/z per spectrum are needed"/>
                            <param name="allow_nomatch" type="boolean" label="Don't throw an error when less than 2 reference m/z were found in a spectrum" truevalue="TRUE" falsevalue="FALSE"/>
                            <param name="empty_nomatch" type="boolean" label="logical, if TRUE the intensity values of MassSpectrum or MassPeaks objects with missing (NA) warping functions are set to zero" truevalue="TRUE" falsevalue="FALSE"/>
                            <param name="remove_empty" type="boolean" label="Should empty spectra be removed" truevalue="TRUE" falsevalue="FALSE"/>
                        </when>
                    </conditional>
                </when>
            </conditional>
        </repeat>
        <param name="export_processed" type="boolean" label="Export file as processed imzML" help="otherwise continuous imzML will be exported" truevalue="TRUE" falsevalue="FALSE"/>
    </inputs>
    <outputs>
        <data format="imzml" name="outfile_imzml" label="$infile.display_name processed" />
        <data format="pdf" name="plots" from_work_dir="prepro_qc_plot.pdf" label="$infile.display_name preprocessed QC"/>
        <data format="tabular" name="annotation_output" label="$infile.display_name annotations">
            <filter>tabular_annotation["load_annotation"] == 'yes_annotation'</filter>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="infile" value="" ftype="imzml">
                <composite_data value="Example_Continuous.imzML"/>
                <composite_data value="Example_Continuous.ibd"/>
            </param>
            <conditional name="restriction_conditional">
                <param name="restriction" value="restrict"/>
                <param name="coordinates_file" value="restricted_pixels.tabular"/>
            </conditional>
            <conditional name="methods_conditional">
                <param name="method" value="Transformation"/>
                <param name="transform_method" value="log2"/>
                <param name="method" value="Smoothing"/>
                <param name="smooth_method" value="SavitzkyGolay"/>
                <param name="method" value="Basline"/>
                <param name="baseline_method" value ="TopHat"/>
            </conditional>
            <output name="outfile_imzml" file="outfile1.imzML" compare="sim_size"/>
            <output name="outfile_imzml" file="outfile1.ibd" compare="sim_size"/>
            <output name="plots" file="Preprocessing1_QC.pdf" compare="sim_size"/>
        </test>
        <test>
            <param name="infile" value="msidata_1.RData" ftype="rdata"/>
            <conditional name="methods_conditional">
                <param name="method" value="Calibrate"/>
                <param name="calibrate_method" value="PQN"/>
            </conditional>
            <output name="outfile_imzml" file="outfile2.imzML" compare="sim_size"/>
            <output name="outfile_imzml" file="outfile2.ibd" compare="sim_size"/>
            <output name="plots" file="Preprocessing2_QC.pdf" compare="sim_size"/>
        </test>
        <test>
            <param name="infile" value="" ftype="imzml">
                <composite_data value="Example_Continuous.imzML"/>
                <composite_data value="Example_Continuous.ibd"/>
            </param>
            <conditional name="tabular_annotation">
                <param name="load_annotation" value="yes_annotation"/>
                <param name="annotation_file" value="pixel_annotations.tabular"/>
                <param name="column_x" value="1"/>
                <param name="column_y" value="2"/>
                <param name="column_names" value="3"/>
                <param name="tabular_header" value="TRUE"/>
            </conditional>
            <conditional name="methods_conditional">
                <param name="method" value="Align"/>
                <param name="warping_method" value="linear"/>
                <param name="halfWindowSize" value="1"/>
                <conditional name="reference_for_alignment">
                    <param name="align_ref" value="yes_reference"/>
                    <param name="reference_file" value="align_reference_test2.tabular" ftype="tabular"/>
                    <param name="allow_nomatch" value="TRUE"/>
                    <param name="remove_empty" value="TRUE"/>
                    <param name="empty_nomatch" value="TRUE"/>
                </conditional>
            </conditional>
            <output name="outfile_imzml" file="outfile3.imzML" compare="sim_size"/>
            <output name="outfile_imzml" file="outfile3.ibd" compare="sim_size"/>
            <output name="plots" file="Preprocessing3_QC.pdf" compare="sim_size"/>
            <output name="annotation_output" file="annotations_output3.tabular"/>
        </test>
    </tests>
    <help><![CDATA[

MALDIquant_ provides a complete analysis pipeline for MALDI-TOF and other mass spectrometry data. So far we have only implemented the functionalities for mass spectrometry imaging data.

Input data: 

- MSI data as imzML file (upload via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_
- optinal tabular file with pixel coordinates to restrict reading of imzML file to coordinates of interest

Options:

- Transformation: transformation of intensities with log, log2, log10 and squareroot
- Smoothing: Smoothing of the peaks reduces noise and improves peak detection. Available smoothing methods are SavitzkyGolay and Moving Average
- Baseline reduction: Baseline  reduction removes background intensity generated by chemical noise (common in MALDI datasets). Available methods are SNIP, TopHat,ConvexHull and median.
- Intensity calibration (normalization): Normalization of intensities to Total Ion Current (TIC), median spectrum, Probabilistic Quotient Normalization (PQN)
- Spectra alignment (warping):alignment for (re)calibration of m/z values


Output: 

- imzML file (imzML format can be continuous or processed)
- pdf with average mass spectra after each preprocessing step

.. _MALDIquant: http://strimmerlab.org/software/maldiquant/

        ]]>
    </help>
    <expand macro="citation"/>
</tool>