diff maldi_quant_preprocessing.xml @ 0:e2aa05746a69 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/MALDIquant commit 5feaf3d0e0da8cef1241fecc1f4d6f81324594e6
author galaxyp
date Wed, 22 Aug 2018 11:49:06 -0400
parents
children 0892a051eb17
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/maldi_quant_preprocessing.xml	Wed Aug 22 11:49:06 2018 -0400
@@ -0,0 +1,509 @@
+<tool id="maldi_quant_preprocessing" name="MALDIquant preprocessing" version="1.18.0.0">
+    <description>
+        Preprocessing of mass-spectrometry imaging data
+    </description>
+    <macros>
+        <import>maldi_macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code">
+    <![CDATA[
+        #if $infile.ext == 'imzml'
+            cp '${infile.extra_files_path}/imzml' infile.imzML &&
+            cp '${infile.extra_files_path}/ibd' infile.ibd &&
+        #elif $infile.ext == 'analyze75'
+            cp '${infile.extra_files_path}/hdr' infile.hdr &&
+            cp '${infile.extra_files_path}/img' infile.img &&
+            cp '${infile.extra_files_path}/t2m' infile.t2m &&
+            du infile.hdr &&
+            du infile.img &&
+            du -s -B1 infile.hdr &&
+        #else
+            ln -s $infile infile.RData &&
+        #end if
+        Rscript "${maldi_quant_preprocessing}" &&
+        mkdir $outfile_imzml.files_path &&
+        mv ./out.imzMl "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true &&
+        mv ./out.ibd "${os.path.join($outfile_imzml.files_path, 'ibd')}" | true &&
+        echo "imzML file:" > $outfile_imzml &&
+        ls -l "$outfile_imzml.files_path" >> $outfile_imzml
+    ]]>
+    </command>
+    <configfiles>
+        <configfile name="maldi_quant_preprocessing"><![CDATA[
+
+@R_IMPORTS@
+
+#if $restriction_conditional.restriction == 'restrict':
+
+    print('Reading mask region')
+    ## Import imzML file
+
+    coordinate_matrix = as.matrix(read.delim("$restriction_conditional.coordinates_file", header = FALSE, stringsAsFactors = FALSE))[,1:2]
+
+    maldi_data = importImzMl('infile.imzML',
+                     coordinates = coordinate_matrix)
+    pixelnames = paste0("x = ", coordinates(maldi_data)[,1],", y = ", coordinates(maldi_data)[,2])
+
+#else:
+
+    print('Reading entire file')
+    #if $infile.ext == 'imzml'
+        ## Import imzML file
+        maldi_data = import( 'infile.imzML', type="imzML" )
+    #elif $infile.ext == 'analyze75'
+        ## Import analyze7.5 file
+        maldi_data = import( 'infile.hdr' )
+    #else
+        loadRData <- function(fileName){
+        #loads an RData file, and returns it
+        load(fileName)
+        get(ls()[ls() != "fileName"])
+        }
+        msidata = loadRData('infile.RData')
+
+        ## save coordinates
+        cardinal_coordinates = as.matrix(Cardinal::coord(msidata)[,1:2])
+        ## save mz values
+        cardinal_mzs = Cardinal::mz(msidata)
+        ## create MALDIquant MassSpectrum object
+        maldi_data = list()
+        for(number_spectra in 1:ncol(msidata)){
+        maldi_data[[number_spectra]] = createMassSpectrum(mass = cardinal_mzs, intensity = iData(msidata)[,number_spectra])
+        }
+
+    #end if
+
+#end if
+
+## Quality control plots during preprocessing 
+
+pdf("prepro_qc_plot.pdf", fonts = "Times", pointsize = 12)
+plot(0,type='n',axes=FALSE,ann=FALSE)
+
+## if no filename is given, name of file in Galaxy history is used
+    #set $filename = $infile.display_name
+title(main=paste("$filename"))
+
+#if str($tabular_annotation.load_annotation) == 'yes_annotation':
+    print("use annotation file")
+    ## read and extract x,y,annotation information
+    input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE)
+    annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)]
+    colnames(annotation_input) = c("x", "y", "annotation") ## rename annotations header to default name "annotation"
+
+    ## merge with coordinate information of MSI data
+    coordinates_st = cbind(coordinates(maldi_data)[,1:2], c(1:length(maldi_data)))
+    colnames(coordinates_st)[3] = "pixel_index"
+    merged_annotation = merge(coordinates_st, annotation_input, by=c("x", "y"), all.x=TRUE)
+    merged_annotation[is.na(merged_annotation)] = "NA"
+    merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),]
+    samples = as.factor(merged_annotation\$annotation)
+
+## print annotation overview into PDF output
+
+        ## the more annotation groups a file has the smaller will be the legend
+        number_combined = length(levels(as.factor(merged_annotation\$annotation)))
+        if (number_combined<20){
+            legend_size = 10
+        }else if (number_combined>20 && number_combined<40){
+            legend_size = 9
+        }else if (number_combined>40 && number_combined<60){
+            legend_size = 8
+        }else if (number_combined>60 && number_combined<100){
+            legend_size = 7
+        }else{
+            legend_size = 6
+        }
+
+        combine_plot = ggplot(merged_annotation, aes(x=x, y=y, fill=annotation))+
+               geom_tile() +
+               coord_fixed()+
+               ggtitle("Spatial orientation of annotated data")+
+               theme_bw()+
+               theme(plot.title = element_text(hjust = 0.5))+
+               theme(text=element_text(family="ArialMT", face="bold", size=12))+
+               theme(legend.position="bottom",legend.direction="vertical")+
+               theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = legend_size))+
+               guides(fill=guide_legend(ncol=5,byrow=TRUE))
+
+        print(combine_plot)
+
+#end if
+
+#################### Preprocessing methods #####################################
+
+## QC plot
+avgSpectra = averageMassSpectra(maldi_data,method="mean")
+plot(avgSpectra, main="Average spectrum for input file")
+
+#for $method in $methods:
+
+    #if str( $method.methods_conditional.method ) == 'Transformation':
+
+        print('transforming')
+        ##transformation
+        maldi_data = transformIntensity(maldi_data, method="$method.methods_conditional.transform_method")
+        ## QC plot
+        avgSpectra = averageMassSpectra(maldi_data,method="mean")
+        plot(avgSpectra, main="Average spectrum after transformation")
+
+
+    #elif str( $method.methods_conditional.method ) == 'Smoothing':
+
+        print('smoothing')
+        ##smoothing
+
+        #if str($method.methods_conditional.methods_for_smoothing.smooth_method ) == 'SavitzkyGolay':
+        print('SavitzkyGolay')
+
+            maldi_data = smoothIntensity(maldi_data,
+                    method="SavitzkyGolay", polynomialOrder=$method.methods_conditional.methods_for_smoothing.polynomial,
+                    halfWindowSize=$method.methods_conditional.halfWindowSize)
+
+        #elif str($method.methods_conditional.methods_for_smoothing.smooth_method ) == 'MovingAverage':
+            print('MovingAverage')
+
+            maldi_data = smoothIntensity(maldi_data,
+                    method="MovingAverage", weighted=$method.methods_conditional.methods_for_smoothing.weighted,
+                    halfWindowSize=$method.methods_conditional.halfWindowSize)
+
+        #end if
+
+        ## QC plot
+        avgSpectra = averageMassSpectra(maldi_data,method="mean")
+        plot(avgSpectra, main="Average spectrum after smoothing")
+
+
+    #elif str( $method.methods_conditional.method ) == 'Baseline':
+
+        print('baseline removing')
+        ## Remove baseline
+
+        maldi_data = removeBaseline(maldi_data,
+            method="$method.methods_conditional.baseline_method",
+            iterations=$method.methods_conditional.iterations)
+        ## QC plot
+        avgSpectra = averageMassSpectra(maldi_data,method="mean")
+        plot(avgSpectra, main="Average spectrum after baseline removal")
+
+
+    #elif str( $method.methods_conditional.method ) == 'Calibrate':
+
+        print('calibrate')
+        ##calibrate
+
+        #if $method.methods_conditional.mass_start != 0 and $method.methods_conditional.mass_end != 0:
+        ## calibrate only given m/z range
+        maldi_data = calibrateIntensity(maldi_data,
+            method="$method.methods_conditional.calibrate_method",
+            range=c($method.methods_conditional.mass_start, $method.methods_conditional.mass_end))
+        #else:
+        maldi_data = calibrateIntensity(maldi_data,
+            method="$method.methods_conditional.calibrate_method")
+        #end if
+        ## QC plot
+        avgSpectra = averageMassSpectra(maldi_data,method="mean")
+        plot(avgSpectra, main="Average spectrum after normalization")
+
+
+    #elif str( $method.methods_conditional.method ) == 'Align':
+
+        print('align')
+        ##align spectra
+
+        #if str($method.methods_conditional.reference_for_alignment.align_ref) == 'no_reference':
+
+            maldi_data = alignSpectra(maldi_data, halfWindowSize=$method.methods_conditional.halfWindowSize,
+                SNR=$method.methods_conditional.snr,
+                tolerance=$method.methods_conditional.tolerance,
+                warpingMethod="$method.methods_conditional.warping_method")
+
+        #elif str($method.methods_conditional.reference_for_alignment.align_ref) == 'yes_reference':
+
+            ## create reference mass_vector from tabular file
+            mass_vector = read.delim("$method.methods_conditional.reference_for_alignment.reference_file", header = FALSE, stringsAsFactors = FALSE)[,1]
+            int_vector = rep(1,length(mass_vector))
+            mass_list = createMassPeaks(mass_vector, int_vector)
+
+            maldi_data = alignSpectra(maldi_data, halfWindowSize=$method.methods_conditional.halfWindowSize,
+                SNR=$method.methods_conditional.snr,
+                tolerance=$method.methods_conditional.tolerance,
+                warpingMethod="$method.methods_conditional.warping_method", 
+                reference = mass_list, allowNoMatches =$method.methods_conditional.reference_for_alignment.allow_nomatch, emptyNoMatches = $method.methods_conditional.reference_for_alignment.empty_nomatch)
+
+            #if $method.methods_conditional.reference_for_alignment.remove_empty:
+
+                #if $infile.ext == 'rdata'
+                    cardinal_coordinates = cardinal_coordinates[-findEmptyMassObjects(maldi_data),] ## remove coordinates of empty spectra for Cardinal RData input
+                #end if
+                #if str($tabular_annotation.load_annotation) == 'yes_annotation':
+                    merged_annotation = merged_annotation[-findEmptyMassObjects(maldi_data),] ## remove coordinate annotations for empty spectra
+                #end if
+                maldi_data = removeEmptyMassObjects(maldi_data)
+            #end if
+        #end if
+
+        ## QC plot
+
+        if (length(maldi_data)>0){
+            avgSpectra = averageMassSpectra(maldi_data,method="mean")
+            plot(avgSpectra, main="Average spectrum after alignment")
+        }else{"All spectra are empty"}
+
+    #end if
+#end for
+
+dev.off()
+
+## export imzML file
+if (length(maldi_data)>0){
+    #if $infile.ext == 'rdata'
+        MALDIquantForeign::exportImzMl(maldi_data, file="out.imzMl", processed=$export_processed, coordinates=cardinal_coordinates)
+    #else
+        MALDIquantForeign::exportImzMl(maldi_data, file="out.imzMl", processed=$export_processed)
+    #end if
+
+    ## export annotation tabular file
+    #if str($tabular_annotation.load_annotation) == 'yes_annotation':
+        write.table(merged_annotation, file="$annotation_output", quote = FALSE, row.names = FALSE, col.names=TRUE, sep = "\t")
+    #end if
+}else{"All spectra are empty, outputfiles will be empty,too."}
+
+    ]]>
+        </configfile>
+    </configfiles>
+    <inputs>
+        <param name="infile" type="data" format="imzml,rdata" label="MS metadata" help="This file is in imzML format or Cardinal MSImageSet saved as RData"/>
+        <conditional name="restriction_conditional">
+            <param name="restriction" type="select" label="Read in only spectra of interest" help="This option only works for imzML files">
+                <option value="no_restriction" selected="True">Calculate on entire file</option>
+                <option value="restrict">Restrict to coordinates of interest</option>
+            </param>
+            <when value="restrict">
+                <param name="coordinates_file" type="data" format="tabular" label="Tabular file with coordinates which should be read" help="x-values in first column, y-values in second column"/>
+            </when>              
+            <when value="no_restriction"/>
+        </conditional>
+        <conditional name="tabular_annotation">
+            <param name="load_annotation" type="select" label="Use pixel annotation from tabular file to have updated annotation information in case empty spectra will be removed">
+                <option value="no_annotation" selected="True">use no annotation</option>
+                <option value="yes_annotation">use pixel annotation from a tabular file</option>
+            </param>
+                <when value="yes_annotation">
+                    <param name="annotation_file" type="data" format="tabular" label="Use annotations from tabular file"
+                    help="Tabular file with three columns: x values, y values and pixel annotations"/>
+                        <param name="column_x" data_ref="annotation_file" label="Column with x values" type="data_column"/>
+                        <param name="column_y" data_ref="annotation_file" label="Column with y values" type="data_column"/>
+                        <param name="column_names" data_ref="annotation_file" label="Column with pixel annotations" type="data_column"/>
+                        <param name="tabular_header" type="boolean" label="Tabular file contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
+                </when>
+                <when value="no_annotation"/>
+        </conditional>
+        <repeat name="methods" title="Method" min="1">
+            <conditional name="methods_conditional">
+                <param name="method" type="select" label="Select the method you want to apply">
+                    <option value="Transformation" selected="True">Transformation</option>
+                    <option value="Smoothing">Smoothing</option>
+                    <option value="Baseline">Baseline removal</option>
+                    <option value="Calibrate">Calibrate</option>
+                    <option value="Align">Align Spectra (warping/phase correction)</option>
+                    <validator type="empty_field" />
+                </param>
+                <when value="Transformation">
+                    <param name="transform_method" type="select" label="Select your transfprormation method">
+                        <option value="sqrt" selected="True">sqrt</option>
+                        <option value="log">log</option>
+                        <option value="log2">log2</option>
+                        <option value="log10">log10</option>
+                        <validator type="empty_field" />
+                    </param>
+                </when>
+                <when value="Smoothing">
+                    <conditional name="methods_for_smoothing">
+                        <param name="smooth_method" type="select" label="This method smoothes the intensity values of a MassSpectrum object">
+                            <option value="SavitzkyGolay" selected="True">SavitzkyGolay</option>
+                            <option value="MovingAverage">MovingAverage</option>
+                        </param>
+                        <when value="SavitzkyGolay">
+                            <param name="polynomial" value="3" type="text" label="PolynomialOrder argument to control the order of the filter"/>
+                        </when>
+                        <when value="MovingAverage">
+                            <param name="weighted" type="boolean" label="Weighted average" help = "indicates if the average should be equal weight or if it should have weights depending on the distance from the center as calculated as 1/2^abs(-halfWindowSize:halfWindowSize) with the sum of all weigths normalized to 1" truevalue="TRUE" falsevalue="FALSE"/>
+                        </when>
+                    </conditional>
+                    <param name="halfWindowSize" type="integer" value="10"
+                        label="Half window size"
+                        help="The resulting window reaches from 
+                            mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]
+                            (window size is 2*halfWindowSize+1).
+                            The best size differs depending on the selected smoothing method."/>
+                </when>
+                <when value="Baseline">
+                    <param name="baseline_method" type="select" label="Baseline removal method">
+                        <option value="SNIP" selected="True">SNIP</option>
+                        <option value="TopHat">TopHat</option>
+                        <option value="ConvexHull">ConvexHull</option>
+                        <option value="median">median</option>
+                        <validator type="empty_field" />
+                    </param>
+                    <param name="iterations" type="integer" value="100"
+                        label="Number of iterations"
+                        help=""/>
+                </when>
+                <when value="Calibrate">
+                    <param name="calibrate_method" type="select" label="Calibration method">
+                        <option value="TIC" selected="True">TIC</option>
+                        <option value="PQN">PQN</option>
+                        <option value="median">median</option>
+                        <validator type="empty_field" />
+                    </param>
+                    <param name="mass_start" type="integer" value="0"
+                        label="Start of m/z range, has to be inside m/z range" 
+                        help="Scaling factor is calculated on the mass range and applied to the whole spectrum"/>
+                    <param name="mass_end" type="integer" value="0"
+                        label="End of m/z range, has to be inside m/z range" 
+                        help="The Start and End value needs to be different from 0 to be taken into account and."/>
+                </when>
+                <when value="Align">
+                    <param name="warping_method" type="select" label="Warping methods">
+                        <option value="lowess" selected="True">Lowess</option>
+                        <option value="linear">Linear</option>
+                        <option value="quadratic">Quadratic</option>
+                        <option value="cubic">Cubic</option>
+                    </param>
+
+                    <param name="tolerance" type="float" value="0.002"
+                        label="Tolerance"
+                        help="Double, maximal relative deviation of a peak position (m/z) to be considered as identical" />
+
+                    <param name="halfWindowSize" type="integer" value="20"
+                        label="Half window size"
+                        help="The resulting window reaches from 
+                            mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]
+                            (window size is 2*halfWindowSize+1).
+                            The best size differs depending on the selected smoothing method."/>
+
+                    <param name="snr" type="integer" value="2"
+                        label="Signal-to-noise-ratio"
+                        help=""/>
+
+                    <conditional name="reference_for_alignment">
+                        <param name="align_ref" type="select" label="Reference to which the samples should be aligned" help="Use internal calibrants to perform m/z calibration">
+                            <option value="no_reference" selected="True">no reference</option>
+                            <option value="yes_reference">reference from tabular file</option>
+                        </param>
+                        <when value="no_reference"/>
+                        <when value="yes_reference">
+                            <param name="reference_file" type="data" format="tabular"
+                                label="Tabular file with m/z of internal calibrants (MassPeaks) which should be used for spectra alignment"
+                                help="calibration of m/z values to internal calibrants, at least 2 m/z per spectrum are needed"/>
+                            <param name="allow_nomatch" type="boolean" label="Don't throw an error when less than 2 reference m/z were found in a spectrum" truevalue="TRUE" falsevalue="FALSE"/>
+                            <param name="empty_nomatch" type="boolean" label="logical, if TRUE the intensity values of MassSpectrum or MassPeaks objects with missing (NA) warping functions are set to zero" truevalue="TRUE" falsevalue="FALSE"/>
+                            <param name="remove_empty" type="boolean" label="Should empty spectra be removed" truevalue="TRUE" falsevalue="FALSE"/>
+                        </when>
+                    </conditional>
+                </when>
+            </conditional>
+        </repeat>
+        <param name="export_processed" type="boolean" label="Export file as processed imzML" help="otherwise continuous imzML will be exported" truevalue="TRUE" falsevalue="FALSE"/>
+    </inputs>
+    <outputs>
+        <data format="imzml" name="outfile_imzml" label="$infile.display_name processed" />
+        <data format="pdf" name="plots" from_work_dir="prepro_qc_plot.pdf" label="$infile.display_name preprocessed QC"/>
+        <data format="tabular" name="annotation_output" label="$infile.display_name annotations">
+            <filter>tabular_annotation["load_annotation"] == 'yes_annotation'</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" value="" ftype="imzml">
+                <composite_data value="Example_Continuous.imzML"/>
+                <composite_data value="Example_Continuous.ibd"/>
+            </param>
+            <conditional name="restriction_conditional">
+                <param name="restriction" value="restrict"/>
+                <param name="coordinates_file" value="restricted_pixels.tabular"/>
+            </conditional>
+            <conditional name="methods_conditional">
+                <param name="method" value="Transformation"/>
+                <param name="transform_method" value="log2"/>
+                <param name="method" value="Smoothing"/>
+                <param name="smooth_method" value="SavitzkyGolay"/>
+                <param name="method" value="Basline"/>
+                <param name="baseline_method" value ="TopHat"/>
+            </conditional>
+            <output name="outfile_imzml" file="outfile1.imzML" compare="sim_size"/>
+            <output name="outfile_imzml" file="outfile1.ibd" compare="sim_size"/>
+            <output name="plots" file="Preprocessing1_QC.pdf" compare="sim_size"/>
+        </test>
+        <test>
+            <param name="infile" value="msidata_1.RData" ftype="rdata"/>
+            <conditional name="methods_conditional">
+                <param name="method" value="Calibrate"/>
+                <param name="calibrate_method" value="PQN"/>
+            </conditional>
+            <output name="outfile_imzml" file="outfile2.imzML" compare="sim_size"/>
+            <output name="outfile_imzml" file="outfile2.ibd" compare="sim_size"/>
+            <output name="plots" file="Preprocessing2_QC.pdf" compare="sim_size"/>
+        </test>
+        <test>
+            <param name="infile" value="" ftype="imzml">
+                <composite_data value="Example_Continuous.imzML"/>
+                <composite_data value="Example_Continuous.ibd"/>
+            </param>
+            <conditional name="tabular_annotation">
+                <param name="load_annotation" value="yes_annotation"/>
+                <param name="annotation_file" value="pixel_annotations.tabular"/>
+                <param name="column_x" value="1"/>
+                <param name="column_y" value="2"/>
+                <param name="column_names" value="3"/>
+                <param name="tabular_header" value="TRUE"/>
+            </conditional>
+            <conditional name="methods_conditional">
+                <param name="method" value="Align"/>
+                <param name="warping_method" value="linear"/>
+                <param name="halfWindowSize" value="1"/>
+                <conditional name="reference_for_alignment">
+                    <param name="align_ref" value="yes_reference"/>
+                    <param name="reference_file" value="align_reference_test2.tabular" ftype="tabular"/>
+                    <param name="allow_nomatch" value="TRUE"/>
+                    <param name="remove_empty" value="TRUE"/>
+                    <param name="empty_nomatch" value="TRUE"/>
+                </conditional>
+            </conditional>
+            <output name="outfile_imzml" file="outfile3.imzML" compare="sim_size"/>
+            <output name="outfile_imzml" file="outfile3.ibd" compare="sim_size"/>
+            <output name="plots" file="Preprocessing3_QC.pdf" compare="sim_size"/>
+            <output name="annotation_output" file="annotations_output3.tabular"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+
+MALDIquant_ provides a complete analysis pipeline for MALDI-TOF and other mass spectrometry data. So far we have only implemented the functionalities for mass spectrometry imaging data.
+
+Input data: 
+
+- MSI data as imzML file (upload via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_
+- optinal tabular file with pixel coordinates to restrict reading of imzML file to coordinates of interest
+
+Options:
+
+- Transformation: transformation of intensities with log, log2, log10 and squareroot
+- Smoothing: Smoothing of the peaks reduces noise and improves peak detection. Available smoothing methods are SavitzkyGolay and Moving Average
+- Baseline reduction: Baseline  reduction removes background intensity generated by chemical noise (common in MALDI datasets). Available methods are SNIP, TopHat,ConvexHull and median.
+- Intensity calibration (normalization): Normalization of intensities to Total Ion Current (TIC), median spectrum, Probabilistic Quotient Normalization (PQN)
+- Spectra alignment (warping):alignment for (re)calibration of m/z values
+
+
+Output: 
+
+- imzML file (imzML format can be continuous or processed)
+- pdf with average mass spectra after each preprocessing step
+
+.. _MALDIquant: http://strimmerlab.org/software/maldiquant/
+
+        ]]>
+    </help>
+    <expand macro="citation"/>
+</tool>