Mercurial > repos > galaxyp > maldi_quant_preprocessing
diff maldi_quant_preprocessing.xml @ 3:71411ac28268 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/MALDIquant commit f127be2141cf22e269c85282d226eb16fe14a9c1
author | galaxyp |
---|---|
date | Fri, 15 Feb 2019 10:26:00 -0500 |
parents | e754c2b545a9 |
children | 60ee8c592b13 |
line wrap: on
line diff
--- a/maldi_quant_preprocessing.xml Thu Oct 25 07:31:55 2018 -0400 +++ b/maldi_quant_preprocessing.xml Fri Feb 15 10:26:00 2019 -0500 @@ -1,671 +1,703 @@ -<tool id="maldi_quant_preprocessing" name="MALDIquant preprocessing" version="@VERSION@.2"> - <description> - Preprocessing of mass-spectrometry imaging data - </description> - <macros> - <import>maldi_macros.xml</import> - </macros> - <expand macro="requirements"/> - <command detect_errors="exit_code"> - <![CDATA[ - cat '${maldi_quant_preprocessing}' && - #if $infile.ext == 'imzml' - cp '${infile.extra_files_path}/imzml' infile.imzML && - cp '${infile.extra_files_path}/ibd' infile.ibd && - #elif $infile.ext == 'analyze75' - cp '${infile.extra_files_path}/hdr' infile.hdr && - cp '${infile.extra_files_path}/img' infile.img && - cp '${infile.extra_files_path}/t2m' infile.t2m && - du infile.hdr && - du infile.img && - du -s -B1 infile.hdr && - #else - ln -s $infile infile.RData && - #end if - Rscript "${maldi_quant_preprocessing}" && - - mkdir $outfile_imzml.files_path && - mv ./out.imzMl "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true && - mv ./out.ibd "${os.path.join($outfile_imzml.files_path, 'ibd')}" | true && - echo "imzML file:" > $outfile_imzml && - ls -l "$outfile_imzml.files_path" >> $outfile_imzml - ]]> - </command> - <configfiles> - <configfile name="maldi_quant_preprocessing"><![CDATA[ - -@R_IMPORTS@ - -#if $restriction_conditional.restriction == 'restrict': - - print('Reading mask region') - ## Import imzML file - - coordinate_matrix = as.matrix(read.delim("$restriction_conditional.coordinates_file", header = $restriction_conditional.coordinates_header, stringsAsFactors = FALSE))[,1:2] - - maldi_data = importImzMl('infile.imzML', - coordinates = coordinate_matrix) - pixelnames = paste0("x = ", coordinates(maldi_data)[,1],", y = ", coordinates(maldi_data)[,2]) - -#else: - - print('Reading entire file') - #if $infile.ext == 'imzml' - ## Import imzML file - maldi_data = import( 'infile.imzML', type="imzML" ) - coordinates_info = cbind(coordinates(maldi_data)[,1:2], c(1:length(maldi_data))) - #elif $infile.ext == 'analyze75' - ## Import analyze7.5 file - maldi_data = importAnalyze( 'infile.hdr' ) - coordinates_info = cbind(coordinates(maldi_data)[,1:2], c(1:length(maldi_data))) - #else - loadRData <- function(fileName){ - #loads an RData file, and returns it - load(fileName) - get(ls()[ls() != "fileName"]) - } - msidata = loadRData('infile.RData') - ## save coordinates - cardinal_coordinates = as.matrix(Cardinal::coord(msidata)[,1:2]) - ## save mz values - cardinal_mzs = Cardinal::mz(msidata) - ## create MALDIquant MassSpectrum object - maldi_data = list() - for(number_spectra in 1:ncol(msidata)){ - maldi_data[[number_spectra]] = createMassSpectrum(mass = cardinal_mzs, intensity = iData(msidata)[,number_spectra]) - coordinates_info = cbind(cardinal_coordinates, c(1:length(maldi_data))) - } - - #end if - -#end if - -## Quality control plots during preprocessing - -pdf("prepro_qc_plot.pdf", fonts = "Times", pointsize = 12) -plot(0,type='n',axes=FALSE,ann=FALSE) - -## if no filename is given, name of file in Galaxy history is used - #set $filename = $infile.display_name -title(main=paste("$filename")) - -#if str($tabular_annotation.load_annotation) == 'yes_annotation': - print("use annotation file") - - ## read and extract x,y,annotation information - input_tabular = read.delim("$tabular_annotation.annotation_file", header = $tabular_annotation.tabular_header, stringsAsFactors = FALSE) - annotation_input = input_tabular[,c($tabular_annotation.column_x, $tabular_annotation.column_y, $tabular_annotation.column_names)] - colnames(annotation_input) = c("x", "y", "annotation") ## rename annotations header to default name "annotation" - - ## merge with coordinate information (from above) of MSI data - colnames(coordinates_info)[3] = "pixel_index" - merged_annotation = merge(coordinates_info, annotation_input, by=c("x", "y"), all.x=TRUE) - merged_annotation[is.na(merged_annotation)] = "NA" - merged_annotation = merged_annotation[order(merged_annotation\$pixel_index),] - samples = as.factor(merged_annotation\$annotation) - -## print annotation overview into PDF output - - ## the more annotation groups a file has the smaller will be the legend - number_combined = length(levels(as.factor(merged_annotation\$annotation))) - if (number_combined<20){ - legend_size = 10 - }else if (number_combined>20 && number_combined<40){ - legend_size = 9 - }else if (number_combined>40 && number_combined<60){ - legend_size = 8 - }else if (number_combined>60 && number_combined<100){ - legend_size = 7 - }else{ - legend_size = 6 - } - - combine_plot = ggplot(merged_annotation, aes(x=x, y=y, fill=annotation))+ - geom_tile() + - coord_fixed()+ - ggtitle("Spatial orientation of annotated data")+ - theme_bw()+ - theme(plot.title = element_text(hjust = 0.5))+ - theme(text=element_text(family="ArialMT", face="bold", size=12))+ - theme(legend.position="bottom",legend.direction="vertical")+ - theme(legend.key.size = unit(0.2, "line"), legend.text = element_text(size = legend_size))+ - guides(fill=guide_legend(ncol=5,byrow=TRUE)) - - print(combine_plot) - -#end if - -#################### Preprocessing methods ##################################### - -## QC plot on input file -avgSpectra = averageMassSpectra(maldi_data,method="mean") -plot(avgSpectra, main="Average spectrum for input file") - -pixel_number = length(maldi_data) -minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) -maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) -mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) -number_features = length(unique(unlist(lapply(maldi_data,mass)))) -medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) -inputdata = c(minmz, maxmz,number_features,mean_features, medint) -QC_numbers= data.frame(inputdata = c(minmz, maxmz,number_features, mean_features, medint)) -vectorofactions = "inputdata" - - -#for $method in $methods: - - #if str( $method.methods_conditional.method ) == 'Transformation': - - print('transforming') - ##transformation - maldi_data = transformIntensity(maldi_data, method="$method.methods_conditional.transform_method") - ## QC plot and numbers - avgSpectra = averageMassSpectra(maldi_data,method="mean") - plot(avgSpectra, main="Average spectrum after transformation") - pixel_number = length(maldi_data) - minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) - maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) - mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) - medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) - number_features = length(unique(unlist(lapply(maldi_data,mass)))) - transformed = c(minmz, maxmz,number_features,mean_features, medint) - QC_numbers= cbind(QC_numbers, transformed) - vectorofactions = append(vectorofactions, "transformed") - - - #elif str( $method.methods_conditional.method ) == 'Smoothing': - - print('smoothing') - ##smoothing - - #if str($method.methods_conditional.methods_for_smoothing.smooth_method ) == 'SavitzkyGolay': - print('SavitzkyGolay') - - maldi_data = smoothIntensity(maldi_data, - method="SavitzkyGolay", polynomialOrder=$method.methods_conditional.methods_for_smoothing.polynomial, - halfWindowSize=$method.methods_conditional.halfWindowSize) - - #elif str($method.methods_conditional.methods_for_smoothing.smooth_method ) == 'MovingAverage': - print('MovingAverage') - - maldi_data = smoothIntensity(maldi_data, - method="MovingAverage", weighted=$method.methods_conditional.methods_for_smoothing.weighted, - halfWindowSize=$method.methods_conditional.halfWindowSize) - - #end if - - ## QC plot and numbers - avgSpectra = averageMassSpectra(maldi_data,method="mean") - plot(avgSpectra, main="Average spectrum after smoothing", sub="") - pixel_number = length(maldi_data) - minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) - maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) - mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) - medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) - number_features = length(unique(unlist(lapply(maldi_data,mass)))) - smoothed = c(minmz, maxmz,number_features,mean_features, medint) - QC_numbers= cbind(QC_numbers, smoothed) - vectorofactions = append(vectorofactions, "smoothed") - - - #elif str( $method.methods_conditional.method ) == 'Baseline': - - print('baseline removing') - ## Remove baseline - - #if str($method.methods_conditional.methods_for_baseline.baseline_method ) == 'SNIP': - print('SNIP') - random_spectra = sample(1:length(maldi_data), 4, replace=FALSE) - par(mfrow = c(2,2)) - for (random_sample in random_spectra){ - maldi_data_baseline = estimateBaseline(maldi_data[[random_sample]], - method="SNIP", iterations=$method.methods_conditional.methods_for_baseline.iterations) - plot(maldi_data[[random_sample]], sub="", main=paste0("Estimated baseline for spectrum ", random_sample)) - lines(maldi_data_baseline, col="blue", lwd=2)} - - maldi_data = removeBaseline(maldi_data, - method="SNIP", - iterations=$method.methods_conditional.methods_for_baseline.iterations) - - #elif str($method.methods_conditional.methods_for_baseline.baseline_method ) == 'TopHat': - print('TopHat') - - maldi_data = removeBaseline(maldi_data, - method="TopHat", - halfWindowSize=$method.methods_conditional.methods_for_baseline.tophat_halfWindowSize) - - #elif str($method.methods_conditional.methods_for_baseline.baseline_method ) == 'ConvexHull': - print('ConvexHull') - - maldi_data = removeBaseline(maldi_data, - method="ConvecHull") - - #elif str($method.methods_conditional.methods_for_baseline.baseline_method ) == 'median': - print('median') - - maldi_data = removeBaseline(maldi_data, - method="TopHat", - halfWindowSize=$method.methods_conditional.methods_for_baseline.median_halfWindowSize) - - #end if - - ## QC plot and numbers - par(mfrow = c(1,1)) - avgSpectra = averageMassSpectra(maldi_data,method="mean") - plot(avgSpectra, main="Average spectrum after baseline removal") - pixel_number = length(maldi_data) - minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) - maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) - mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) - medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) - number_features = length(unique(unlist(lapply(maldi_data,mass)))) - baseline_removed = c(minmz, maxmz,number_features,mean_features, medint) - QC_numbers= cbind(QC_numbers, baseline_removed) - vectorofactions = append(vectorofactions, "baseline_removed") - - - #elif str( $method.methods_conditional.method ) == 'Calibrate': - - print('calibrate') - ##calibrate - - #if str($method.methods_conditional.cond_calibration_range) == "yes": - ## calibrate only given m/z range - maldi_data = calibrateIntensity(maldi_data, - method="$method.methods_conditional.calibrate_method", - range=c($method.methods_conditional.cond_calibration_range.mass_start, $method.methods_conditional.cond_calibration_range.mass_end)) - #else: - maldi_data = calibrateIntensity(maldi_data, - method="$method.methods_conditional.calibrate_method") - #end if - ## QC plot and numbers - avgSpectra = averageMassSpectra(maldi_data,method="mean") - plot(avgSpectra, main="Average spectrum after normalization") - pixel_number = length(maldi_data) - minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) - maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) - mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) - medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) - number_features = length(unique(unlist(lapply(maldi_data,mass)))) - intensity_calibrated = c(minmz, maxmz,number_features,mean_features, medint) - QC_numbers= cbind(QC_numbers, intensity_calibrated) - vectorofactions = append(vectorofactions, "intensity_calibrated ") - - - #elif str( $method.methods_conditional.method ) == 'Align': - - print('align') - ##align spectra - - #if str($method.methods_conditional.reference_for_alignment.align_ref) == 'no_reference': - maldi_data = alignSpectra(maldi_data, halfWindowSize=$method.methods_conditional.halfWindowSize, - SNR=$method.methods_conditional.snr, tolerance=$method.methods_conditional.tolerance, - allowNoMatches =$method.methods_conditional.allow_nomatch, emptyNoMatches = $method.methods_conditional.empty_nomatch, - warpingMethod="$method.methods_conditional.warping_method") - - #elif str($method.methods_conditional.reference_for_alignment.align_ref) == 'yes_reference': - ## create reference mass_vector from tabular file - mass_vector = read.delim("$method.methods_conditional.reference_for_alignment.reference_file", header = $method.methods_conditional.reference_for_alignment.reference_header, stringsAsFactors = FALSE)[,1] - int_vector = rep(1,length(mass_vector)) - mass_list = createMassPeaks(mass_vector, int_vector) - - maldi_data = alignSpectra(maldi_data, halfWindowSize=$method.methods_conditional.halfWindowSize, - SNR=$method.methods_conditional.snr, - tolerance=$method.methods_conditional.tolerance, - warpingMethod="$method.methods_conditional.warping_method", - reference = mass_list, allowNoMatches =$method.methods_conditional.allow_nomatch, emptyNoMatches = $method.methods_conditional.empty_nomatch) - - #end if - - #if $method.methods_conditional.remove_empty: - print("remove empty spectra") - - #if $infile.ext == 'rdata' - cardinal_coordinates = cardinal_coordinates[-findEmptyMassObjects(maldi_data),] ## remove coordinates of empty spectra for Cardinal RData input - #end if - #if str($tabular_annotation.load_annotation) == 'yes_annotation': - merged_annotation = merged_annotation[-findEmptyMassObjects(maldi_data),] ## remove coordinate annotations for empty spectra - #end if - maldi_data = removeEmptyMassObjects(maldi_data) - #end if - - - ## QC plot - - if (length(maldi_data)>0){ - avgSpectra = averageMassSpectra(maldi_data,method="mean") - plot(avgSpectra, main="Average spectrum after alignment") - }else{"All spectra are empty"} - - pixel_number = length(maldi_data) - minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) - maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) - mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) - medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) - number_features = length(unique(unlist(lapply(maldi_data,mass)))) - spectra_aligned = c(minmz, maxmz,number_features,mean_features, medint) - QC_numbers= cbind(QC_numbers, spectra_aligned) - vectorofactions = append(vectorofactions, "spectra_aligned") - #end if - -#end for - -rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median \n# features", "median\nintensity") -plot(0,type='n',axes=FALSE,ann=FALSE) -grid.table(t(QC_numbers)) - -dev.off() - -## export imzML file -if (length(maldi_data)>0){ - #if $infile.ext == 'rdata' - MALDIquantForeign::exportImzMl(maldi_data, file="out.imzMl", processed=$export_processed, coordinates=cardinal_coordinates) - #else - MALDIquantForeign::exportImzMl(maldi_data, file="out.imzMl", processed=$export_processed) - #end if - -}else{"All spectra are empty, outputfiles will be empty,too."} - - ]]> - </configfile> - </configfiles> - <inputs> - <param name="infile" type="data" format="imzml,rdata,analyze75" label="Inputfile as imzML or Cardinal MSImageSet saved as RData" help="This file is in imzML format or Cardinal MSImageSet saved as RData. The file must be in profile mode, not centroided"/> - <conditional name="restriction_conditional"> - <param name="restriction" type="select" label="Read in only spectra of interest" help="This option only works for imzML files"> - <option value="no_restriction" selected="True">Calculate on entire file</option> - <option value="restrict">Restrict to coordinates of interest</option> - </param> - <when value="restrict"> - <param name="coordinates_file" type="data" format="tabular" label="Tabular file with coordinates" help="x-values in first column, y-values in second column"/> - <param name="coordinates_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/> - </when> - <when value="no_restriction"/> - </conditional> - <conditional name="tabular_annotation"> - <param name="load_annotation" type="select" label="For Cardinal RData only: Use pixel annotation from tabular file to have updated annotation information in case empty spectra will be removed"> - <option value="no_annotation" selected="True">use no annotation</option> - <option value="yes_annotation">use pixel annotation from a tabular file</option> - </param> - <when value="yes_annotation"> - <param name="annotation_file" type="data" format="tabular" label="Use annotations from tabular file" - help="Tabular file with three columns: x values, y values and pixel annotations"/> - <param name="column_x" data_ref="annotation_file" label="Column with x values" type="data_column"/> - <param name="column_y" data_ref="annotation_file" label="Column with y values" type="data_column"/> - <param name="column_names" data_ref="annotation_file" label="Column with pixel annotations" type="data_column"/> - <param name="tabular_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/> - </when> - <when value="no_annotation"/> - </conditional> - <repeat name="methods" title="Method" min="1"> - <conditional name="methods_conditional"> - <param name="method" type="select" label="Select a method"> - <option value="Transformation" selected="True">Transformation</option> - <option value="Smoothing">Smoothing</option> - <option value="Baseline">Baseline removal</option> - <option value="Calibrate">Calibrate</option> - <option value="Align">Align Spectra (warping/phase correction)</option> - <validator type="empty_field" /> - </param> - <when value="Transformation"> - <param name="transform_method" type="select" label="Select a transfprormation method"> - <option value="sqrt" selected="True">sqrt</option> - <option value="log">log</option> - <option value="log2">log2</option> - <option value="log10">log10</option> - <validator type="empty_field" /> - </param> - </when> - <when value="Smoothing"> - <conditional name="methods_for_smoothing"> - <param name="smooth_method" type="select" label="This method smoothes the intensity values of a MassSpectrum object"> - <option value="SavitzkyGolay" selected="True">SavitzkyGolay</option> - <option value="MovingAverage">MovingAverage</option> - </param> - <when value="SavitzkyGolay"> - <param name="polynomial" value="3" type="text" label="PolynomialOrder argument to control the order of the filter" - help="should be smaller than the resulting window"/> - </when> - <when value="MovingAverage"> - <param name="weighted" type="boolean" label="Weighted average" help = "Indicates if the average should be equal weight or if it should have weights depending on the distance from the center as calculated as 1/2^abs(-halfWindowSize:halfWindowSize) with the sum of all weigths normalized to 1" truevalue="TRUE" falsevalue="FALSE"/> - </when> - </conditional> - <param name="halfWindowSize" type="integer" value="10" - label="Half window size (number of data points)" - help="The resulting window reaches from - mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize] - (window size is 2*halfWindowSize+1)."/> - </when> - <when value="Baseline"> - <conditional name="methods_for_baseline"> - <param name="baseline_method" type="select" label="Baseline removal method"> - <option value="SNIP" selected="True">SNIP</option> - <option value="TopHat">TopHat</option> - <option value="ConvexHull">ConvexHull</option> - <option value="median">median</option> - <validator type="empty_field" /> - </param> - <when value="SNIP"> - <param name="iterations" type="integer" value="100" - label="Number of iterations" help="Corresponds to half window size: The resulting window reaches from mass[cur_index-iterations] to mass[cur_index+iterations]"/> - </when> - <when value="TopHat"> - <param name="tophat_halfWindowSize" type="integer" value="10" - label="Half window size (number of data points)" - help="The resulting window reaches from - mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]"/> - </when> - <when value="ConvexHull"/> - <when value="median"> - <param name="median_halfWindowSize" type="integer" value="10" - label="Half window size (number of data points)" - help="The resulting window reaches from - mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]"/> - </when> - </conditional> - </when> - <when value="Calibrate"> - <param name="calibrate_method" type="select" label="Intensity calibration (normalization) method"> - <option value="TIC" selected="True">TIC</option> - <option value="PQN">PQN</option> - <option value="median">median</option> - <validator type="empty_field" /> - </param> - <conditional name="cond_calibration_range"> - <param name="calibration_range" type="select" label="Instead of the whole m/z range, a specified m/z range can be used to calculate the scaling factor"> - <option value="no" selected="True">complete m/z range</option> - <option value="yes">specify a m/z range</option> - </param> - <when value="no"/> - <when value="yes"> - <param name="mass_start" type="integer" value="800" - label="Start of m/z range, has to be inside m/z range" - help="Scaling factor is calculated on the mass range and applied to the whole spectrum."/> - <param name="mass_end" type="integer" value="3000" - label="End of m/z range, has to be inside m/z range"/> - </when> - </conditional> - </when> - <when value="Align"> - <param name="warping_method" type="select" label="Warping methods"> - <option value="lowess" selected="True">Lowess</option> - <option value="linear">Linear</option> - <option value="quadratic">Quadratic</option> - <option value="cubic">Cubic</option> - </param> - - <param name="tolerance" type="float" value="0.00005" - label="Tolerance = abs(mz1 - mz2)/mz2" - help="Maximal relative deviation of a peak position (m/z) to be considered as identical. For 50ppm use 0.00005 or 50e-6" /> - - <param name="halfWindowSize" type="integer" value="20" - label="Half window size (number of data points)" - help="The resulting window reaches from - mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize] - (window size is 2*halfWindowSize+1). - The best size differs depending on the selected smoothing method."/> - - <param name="snr" type="integer" value="2" label="Signal-to-noise-ratio"/> - <param name="allow_nomatch" type="boolean" label="Don't throw an error when less than 2 reference m/z were found in a spectrum" truevalue="TRUE" falsevalue="FALSE"/> - <param name="empty_nomatch" type="boolean" label="If TRUE the intensity values of MassSpectrum or MassPeaks objects with missing (NA) warping functions are set to zero" truevalue="TRUE" falsevalue="FALSE"/> - <param name="remove_empty" type="boolean" label="Should empty spectra be removed" truevalue="TRUE" falsevalue="FALSE" help="For Cardinal RData files this step can only be performed if pixel annotations were provided"/> - - <conditional name="reference_for_alignment"> - <param name="align_ref" type="select" label="Reference to which the samples should be aligned" help="Use internal calibrants to perform m/z calibration"> - <option value="no_reference" selected="True">no reference</option> - <option value="yes_reference">reference from tabular file</option> - </param> - <when value="no_reference"/> - <when value="yes_reference"> - <param name="reference_file" type="data" format="tabular" - label="Tabular file with m/z (MassPeaks) which should be used for spectra alignment" - help="At least 2 reference m/z per spectrum are needed"/> - <param name="reference_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/> - </when> - </conditional> - </when> - </conditional> - </repeat> - <param name="export_processed" type="boolean" label="Export file as processed imzML" help="otherwise continuous imzML will be exported" truevalue="TRUE" falsevalue="FALSE"/> - </inputs> - <outputs> - <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}" /> - <data format="pdf" name="plots" from_work_dir="prepro_qc_plot.pdf" label="${tool.name} on ${on_string}: QC"/> - </outputs> - <tests> - <test> - <param name="infile" value="" ftype="imzml"> - <composite_data value="Example_Continuous.imzML"/> - <composite_data value="Example_Continuous.ibd"/> - </param> - <conditional name="restriction_conditional"> - <param name="restriction" value="restrict"/> - <param name="coordinates_file" value="restricted_pixels.tabular"/> - </conditional> - <conditional name="methods_conditional"> - <param name="method" value="Transformation"/> - <param name="transform_method" value="log2"/> - <param name="method" value="Smoothing"/> - <param name="smooth_method" value="SavitzkyGolay"/> - <param name="method" value="Basline"/> - <param name="baseline_method" value ="TopHat"/> - </conditional> - <output name="outfile_imzml" file="outfile1.imzML" compare="sim_size"/> - <output name="outfile_imzml" file="outfile1.ibd" compare="sim_size"/> - <output name="plots" file="Preprocessing1_QC.pdf" compare="sim_size"/> - </test> - <test> - <param name="infile" value="msidata_1.RData" ftype="rdata"/> - <conditional name="methods_conditional"> - <param name="method" value="Calibrate"/> - <param name="calibrate_method" value="PQN"/> - </conditional> - <output name="outfile_imzml" file="outfile2.imzML" compare="sim_size"/> - <output name="outfile_imzml" file="outfile2.ibd" compare="sim_size"/> - <output name="plots" file="Preprocessing2_QC.pdf" compare="sim_size"/> - </test> - <test> - <param name="infile" value="" ftype="imzml"> - <composite_data value="Example_Continuous.imzML"/> - <composite_data value="Example_Continuous.ibd"/> - </param> - <conditional name="tabular_annotation"> - <param name="load_annotation" value="yes_annotation"/> - <param name="annotation_file" value="pixel_annotations.tabular"/> - <param name="column_x" value="1"/> - <param name="column_y" value="2"/> - <param name="column_names" value="3"/> - <param name="tabular_header" value="TRUE"/> - </conditional> - <conditional name="methods_conditional"> - <param name="method" value="Align"/> - <param name="warping_method" value="linear"/> - <param name="halfWindowSize" value="1"/> - <param name="tolerance" value="0.002"/> - <param name="allow_nomatch" value="TRUE"/> - <param name="remove_empty" value="TRUE"/> - <param name="empty_nomatch" value="TRUE"/> - <conditional name="reference_for_alignment"> - <param name="align_ref" value="yes_reference"/> - <param name="reference_file" value="align_reference_test2.tabular" ftype="tabular"/> - </conditional> - </conditional> - <output name="outfile_imzml" file="outfile3.imzML" compare="sim_size"/> - <output name="outfile_imzml" file="outfile3.ibd" compare="sim_size"/> - <output name="plots" file="Preprocessing3_QC.pdf" compare="sim_size"/> - </test> - </tests> - <help><![CDATA[ - -@MADLI_QUANT_DESCRIPTION@ - ------ - -**Input data** - -- MSI data: 2 types of input data can be used: - - - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_ - - Cardinal "MSImageSet" data saved as .RData -- Only for Cardinal RData files and when remove empty spectra is chosen: Tabular file with coordinates annotations. Separate columns for x and y coordinates and a third column with pixel annotations. Tabular files with any header name or no header at all are supported -- Optional: Tabular file with pixel coordinates to restrict reading of imzML files to coordinates of interest. The file has to contain x values in the first column and y values in the second columns. Further columns are allowed. Tabular files with any header name or no header at all are supported. - - :: - - x_coord y_coord - 1 1 - 2 1 - 3 1 - ... - ... - -- Optional: Tabular file with reference m/z for the spectra align function. At least 2 m/z values of the input list must be present in every spectrum to peform the alignment. First column must contain m/z values, without empty fields or letters. Tabular files with any header name or no header at all are supported. - - :: - - - m/z - 100.0 - 100.01 - 100.02 - ... - ... - - - -**Options** - -- Transformation: Variance stabilization through intensity transformation:'log', 'log2', 'log10' and 'squareroot' (sqrt) are available -- Smoothing: Smoothing of the peaks reduces noise and improves peak detection. Available smoothing methods are 'SavitzkyGolay' and 'Moving Average' - - - For all smoothing methods: The larger the 'Half window size'f, the stronger the smoothing. The resulting window should be smaller than the FWHM (full width at half maximum) of the typical peaks. Moving average needs smaller window size than SavitzkyGolay. - - Moving average: Recommended for broader peaks/high m/z range spectra. Weighted moving average: Points in the center get larger weight factors than points away from the center. - - SavitzkyGolay: Recommended for sharp peaks/low m/z range, preserves the shape of the local maxima. The PolynomialOrder should be smaller than the resulting window. Negative values will be replaced with 0. - -- Baseline reduction: Baseline reduction removes background intensity generated by chemical noise (common in MALDI datasets). - - - Available methods are SNIP, TopHat,ConvexHull and median: - - SNIP is the default baseline reduction method in MALDIquant. - - ConvexHull is not appropriate for MALDI-TOF baseline removal. - - The moving median may generate negative intensities. - - Except for the ConvexHull all methods have a parameter for the 'Half window size' (in SNIP it is called 'iterations'). The smaller the window the more baseline will be removed but also parts of the peaks. Wider windows preserve the peak height better and produce a smoother baseline, but some local background variation will remain. - -- Intensity calibration (normalization): Normalization of intensities to Total Ion Current (TIC), median spectrum, Probabilistic Quotient Normalization (PQN) - - - TIC and median are local calibration methods: each spectrum is normalized on its own (each peak is divided by the TIC or median of the spectrum) - - PQN is a global calibration method: In PQN all spectra are calibrated using the TIC calibration first. Subsequently, a median reference spectrum is created and the intensities in all spectra are standardized using the reference spectrum and a spectrum-specific median is calculated for each spectrum. Finally, each spectrum is rescaled by the median of the ratios of its intensity values and that of the reference spectrum - -- Spectra alignment (warping): alignment for (re)calibration of m/z values, at least two m/z per spectrum are needed for the alignment. This requirement can be skipped by setting "Don't throw an error when less than 2 reference m/z were found in a spectrum" to yes. If the not aligned spectra should be set to zero select yes in "logical, if TRUE the intensity values of MassSpectrum or MassPeaks objects with missing (NA) warping functions are set to zero". In order to remove such empty spectra set "Should empty spectra be removed" to yes. - - -**Output** - -- imzML file (imzML format can be continuous or processed) -- PDF with average mass spectra after each preprocessing step - -.. _MALDIquant: http://strimmerlab.org/software/maldiquant/ - - ]]> - </help> - <expand macro="citation"/> -</tool> +<tool id="maldi_quant_preprocessing" name="MALDIquant preprocessing" version="@VERSION@.3"> + <description> + Preprocessing of mass-spectrometry imaging data + </description> + <macros> + <import>maldi_macros.xml</import> + </macros> + <expand macro="requirements"/> + <command detect_errors="exit_code"> + <![CDATA[ + cat '${maldi_quant_preprocessing}' && + #if $infile.ext == 'imzml' + cp '${infile.extra_files_path}/imzml' infile.imzML && + cp '${infile.extra_files_path}/ibd' infile.ibd && + #elif $infile.ext == 'analyze75' + cp '${infile.extra_files_path}/hdr' infile.hdr && + cp '${infile.extra_files_path}/img' infile.img && + cp '${infile.extra_files_path}/t2m' infile.t2m && + du infile.hdr && + du infile.img && + du -s -B1 infile.hdr && + #else + ln -s $infile infile.RData && + #end if + Rscript "${maldi_quant_preprocessing}" && + + mkdir $outfile_imzml.files_path && + mv ./out.imzMl "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true && + mv ./out.ibd "${os.path.join($outfile_imzml.files_path, 'ibd')}" | true && + echo "imzML file:" > $outfile_imzml && + ls -l "$outfile_imzml.files_path" >> $outfile_imzml + ]]> + </command> + <configfiles> + <configfile name="maldi_quant_preprocessing"><![CDATA[ + +@R_IMPORTS@ + +#if $restriction_conditional.restriction == 'restrict': + + print('Reading mask region') + ## Import imzML file + + coordinate_matrix = as.matrix(read.delim("$restriction_conditional.coordinates_file", header = $restriction_conditional.coordinates_header, stringsAsFactors = FALSE))[,1:2] + coordinate_matrix = coordinate_matrix[,c($restriction_conditional.column_x, $restriction_conditional.column_y)] + + maldi_data = importImzMl('infile.imzML', + coordinates = coordinate_matrix) + pixelnames = paste0("x = ", coordinates(maldi_data)[,1],", y = ", coordinates(maldi_data)[,2]) + +#else: + + print('Reading entire file') + #if $infile.ext == 'imzml' + ## Import imzML file + maldi_data = import( 'infile.imzML', type="imzML" ) + coordinates_info = cbind(coordinates(maldi_data)[,1:2], c(1:length(maldi_data))) + #elif $infile.ext == 'analyze75' + ## Import analyze7.5 file + maldi_data = importAnalyze( 'infile.hdr' ) + coordinates_info = cbind(coordinates(maldi_data)[,1:2], c(1:length(maldi_data))) + #else + loadRData <- function(fileName){ + ##loads an RData file, and returns it + load(fileName) + get(ls()[ls() != "fileName"]) + } + msidata = loadRData('infile.RData') + ## save coordinates + cardinal_coordinates = as.matrix(Cardinal::coord(msidata)[,1:2]) + ## save mz values + cardinal_mzs = Cardinal::mz(msidata) + ## create MALDIquant MassSpectrum object, order of pixels in iData is same as in coord(msidata): + maldi_data = list() + for(number_spectra in 1:ncol(msidata)){ + maldi_data[[number_spectra]] = MALDIquant::createMassSpectrum(mass = cardinal_mzs, intensity = iData(msidata)[,number_spectra]) + } + #end if + +#end if + +## Quality control plots during preprocessing + +pdf("prepro_qc_plot.pdf", fonts = "Times", pointsize = 12) +plot(0,type='n',axes=FALSE,ann=FALSE) + +## if no filename is given, name of file in Galaxy history is used + #set $filename = $infile.display_name +title(main=paste("$filename")) + + +#################### Preprocessing methods ##################################### + +## QC plot on input file +avgSpectra = averageMassSpectra(maldi_data,method="mean") +plot(avgSpectra, main="Average spectrum for input file") + +pixel_number = length(maldi_data) +minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) +maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) +mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) +number_features = length(unique(unlist(lapply(maldi_data,mass)))) +medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) +inputdata = c(minmz, maxmz,number_features,mean_features,medint, pixel_number) +QC_numbers= data.frame(inputdata = c(minmz, maxmz,number_features, mean_features, medint, pixel_number)) +vectorofactions = "inputdata" + + +#for $method in $methods: + + #if str( $method.methods_conditional.method ) == 'Transformation': + + print('transforming') + ##transformation + maldi_data = transformIntensity(maldi_data, method="$method.methods_conditional.transform_method") + ## QC plot and numbers + avgSpectra = averageMassSpectra(maldi_data,method="mean") + plot(avgSpectra, main="Average spectrum after transformation") + pixel_number = length(maldi_data) + minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) + maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) + mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) + medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) + number_features = length(unique(unlist(lapply(maldi_data,mass)))) + transformed = c(minmz, maxmz,number_features,mean_features,medint,pixel_number) + QC_numbers= cbind(QC_numbers, transformed) + vectorofactions = append(vectorofactions, "transformed") + + + #elif str( $method.methods_conditional.method ) == 'Smoothing': + + print('smoothing') + ##smoothing + + #if str($method.methods_conditional.methods_for_smoothing.smooth_method ) == 'SavitzkyGolay': + print('SavitzkyGolay') + + maldi_data = smoothIntensity(maldi_data, + method="SavitzkyGolay", polynomialOrder=$method.methods_conditional.methods_for_smoothing.polynomial, + halfWindowSize=$method.methods_conditional.halfWindowSize) + + #elif str($method.methods_conditional.methods_for_smoothing.smooth_method ) == 'MovingAverage': + print('MovingAverage') + + maldi_data = smoothIntensity(maldi_data, + method="MovingAverage", weighted=$method.methods_conditional.methods_for_smoothing.weighted, + halfWindowSize=$method.methods_conditional.halfWindowSize) + + #end if + + ## QC plot and numbers + avgSpectra = averageMassSpectra(maldi_data,method="mean") + plot(avgSpectra, main="Average spectrum after smoothing", sub="") + pixel_number = length(maldi_data) + minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) + maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) + mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) + medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) + number_features = length(unique(unlist(lapply(maldi_data,mass)))) + smoothed = c(minmz, maxmz,number_features,mean_features,medint,pixel_number) + QC_numbers= cbind(QC_numbers, smoothed) + vectorofactions = append(vectorofactions, "smoothed") + + + #elif str( $method.methods_conditional.method ) == 'Baseline': + + print('baseline removing') + ## Remove baseline + + ## Choose random spectra for QC plots + random_spectra = sample(1:length(maldi_data), 4, replace=FALSE) + + #if str($method.methods_conditional.methods_for_baseline.baseline_method ) == 'SNIP': + print('SNIP') + + par(mfrow = c(2,2)) + for (random_sample in random_spectra){ + maldi_data_baseline = estimateBaseline(maldi_data[[random_sample]], + method="SNIP", iterations=$method.methods_conditional.methods_for_baseline.iterations) + plot(maldi_data[[random_sample]], sub="", main=paste0("Estimated baseline for spectrum ", random_sample)) + lines(maldi_data_baseline, col="blue", lwd=2)} + + maldi_data = removeBaseline(maldi_data, + method="SNIP", + iterations=$method.methods_conditional.methods_for_baseline.iterations) + + + #elif str($method.methods_conditional.methods_for_baseline.baseline_method ) == 'TopHat': + print('TopHat') + + par(mfrow = c(2,2)) + for (random_sample in random_spectra){ + maldi_data_baseline = estimateBaseline(maldi_data[[random_sample]], + method="TopHat", halfWindowSize=$method.methods_conditional.methods_for_baseline.tophat_halfWindowSize) + plot(maldi_data[[random_sample]], sub="", main=paste0("Estimated baseline for spectrum ", random_sample)) + lines(maldi_data_baseline, col="blue", lwd=2)} + + maldi_data = removeBaseline(maldi_data, + method="TopHat", + halfWindowSize=$method.methods_conditional.methods_for_baseline.tophat_halfWindowSize) + + + #elif str($method.methods_conditional.methods_for_baseline.baseline_method ) == 'ConvexHull': + print('ConvexHull') + + par(mfrow = c(2,2)) + for (random_sample in random_spectra){ + maldi_data_baseline = estimateBaseline(maldi_data[[random_sample]], + method="ConvexHull") + plot(maldi_data[[random_sample]], sub="", main=paste0("Estimated baseline for spectrum ", random_sample)) + lines(maldi_data_baseline, col="blue", lwd=2)} + + maldi_data = removeBaseline(maldi_data, + method="ConvexHull") + + + #elif str($method.methods_conditional.methods_for_baseline.baseline_method ) == 'median': + print('median') + + par(mfrow = c(2,2)) + for (random_sample in random_spectra){ + maldi_data_baseline = estimateBaseline(maldi_data[[random_sample]], + method="median", halfWindowSize=$method.methods_conditional.methods_for_baseline.median_halfWindowSize) + plot(maldi_data[[random_sample]], sub="", main=paste0("Estimated baseline for spectrum ", random_sample)) + lines(maldi_data_baseline, col="blue", lwd=2)} + + maldi_data = removeBaseline(maldi_data, + method="median", + halfWindowSize=$method.methods_conditional.methods_for_baseline.median_halfWindowSize) + + #end if + + ## QC plot and numbers + par(mfrow = c(1,1)) + avgSpectra = averageMassSpectra(maldi_data,method="mean") + plot(avgSpectra, main="Average spectrum after baseline removal") + pixel_number = length(maldi_data) + minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) + maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) + mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) + medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) + number_features = length(unique(unlist(lapply(maldi_data,mass)))) + baseline_removed = c(minmz, maxmz,number_features,mean_features,medint,pixel_number) + QC_numbers= cbind(QC_numbers, baseline_removed) + vectorofactions = append(vectorofactions, "bl_removed") + + + #elif str( $method.methods_conditional.method ) == 'Calibrate': + + print('calibrate') + ##calibrate + + #if str($method.methods_conditional.cond_calibration_range) == "yes": + ## calibrate only given m/z range + maldi_data = calibrateIntensity(maldi_data, + method="$method.methods_conditional.calibrate_method", + range=c($method.methods_conditional.cond_calibration_range.mass_start, $method.methods_conditional.cond_calibration_range.mass_end)) + #else: + maldi_data = calibrateIntensity(maldi_data, + method="$method.methods_conditional.calibrate_method") + #end if + ## QC plot and numbers + avgSpectra = averageMassSpectra(maldi_data,method="mean") + plot(avgSpectra, main="Average spectrum after normalization") + pixel_number = length(maldi_data) + minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) + maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) + mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) + medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) + number_features = length(unique(unlist(lapply(maldi_data,mass)))) + intensity_calibrated = c(minmz, maxmz,number_features,mean_features,medint,pixel_number) + QC_numbers= cbind(QC_numbers, intensity_calibrated) + vectorofactions = append(vectorofactions, "calibrated") + + + #elif str( $method.methods_conditional.method ) == 'Align': + + print('align') + ##align spectra with 3 separate functions + + ## create reference if needed + + ## 1) detect peaks: + peaks <- detectPeaks(maldi_data, halfWindowSize=$method.methods_conditional.halfWindowSize, + method="$method.methods_conditional.peak_method", SNR=$method.methods_conditional.snr) + + + #if str($method.methods_conditional.reference_for_alignment.align_ref) == 'no_reference': + + ## 2) calculate warping: + warping_function <- determineWarpingFunctions(peaks, + tolerance=$method.methods_conditional.tolerance, method="$method.methods_conditional.warping_method", + allowNoMatches=$method.methods_conditional.allow_nomatch, minFrequency = $method.methods_conditional.reference_for_alignment.min_frequency) + + ## 3) warp spectra: + maldi_data = warpMassSpectra(maldi_data, warping_function, emptyNoMatches=$method.methods_conditional.empty_nomatch) + + + #elif str($method.methods_conditional.reference_for_alignment.align_ref) == 'yes_reference': + + ## create reference mass_vector from tabular file + mass_vector = read.delim("$method.methods_conditional.reference_for_alignment.reference_file", header = $method.methods_conditional.reference_for_alignment.reference_header, stringsAsFactors = FALSE)[,$method.methods_conditional.reference_for_alignment.mz_column] + int_vector = rep(1,length(mass_vector)) + mass_list = createMassPeaks(mass_vector, int_vector) + + #if str($method.methods_conditional.reference_for_alignment.separate_alignment) == "FALSE" + print('default alignment') + + ## 2) calculate warping: + warping_function <- determineWarpingFunctions(peaks, + tolerance=$method.methods_conditional.tolerance, method="$method.methods_conditional.warping_method", + allowNoMatches=$method.methods_conditional.allow_nomatch, reference = mass_list) + + ## 3) warp spectra: + maldi_data = warpMassSpectra(maldi_data, warping_function, emptyNoMatches=$method.methods_conditional.empty_nomatch) + + #elif str($method.methods_conditional.reference_for_alignment.separate_alignment) == "TRUE" + print('spectra wise alignment') + + maldi_data_new_list =list() + + for (pixelnb in 1:length(peaks)) + { + ## 2) calculate warping: + warping_function <- determineWarpingFunctions(peaks[[pixelnb]], + tolerance=$method.methods_conditional.tolerance, method="$method.methods_conditional.warping_method", + allowNoMatches=$method.methods_conditional.allow_nomatch, reference = mass_list) + + ## 3) warp spectra: + maldi_data_new = warpMassSpectra(list(maldi_data[[pixelnb]]), warping_function, emptyNoMatches=$method.methods_conditional.empty_nomatch) + maldi_data_new_list = c(maldi_data_new_list, maldi_data_new) + + } + maldi_data = maldi_data_new_list + #end if + #end if + + + #if $method.methods_conditional.remove_empty: + print(paste(length(findEmptyMassObjects(maldi_data)), " empty spectra were removed", sep=" ")) + + ## only if there are empty spectra to remove + + if (length(findEmptyMassObjects(maldi_data))>0) + + { + #if $infile.ext == 'rdata' + cardinal_coordinates = cardinal_coordinates[-findEmptyMassObjects(maldi_data),,drop=FALSE] ## remove coordinates of empty spectra for Cardinal RData input + #end if + + maldi_data = removeEmptyMassObjects(maldi_data) + } + #end if + + ## QC plot + + if (length(maldi_data)>0){ + avgSpectra = averageMassSpectra(maldi_data,method="mean") + plot(avgSpectra, main="Average spectrum after alignment") + medint = round(median(unlist(lapply(maldi_data,intensity))), digits=2) + }else{print("All spectra are empty")} + + pixel_number = length(maldi_data) + minmz = round(min(unlist(lapply(maldi_data,mass))), digits=4) + maxmz = round(max(unlist(lapply(maldi_data,mass))), digits=4) + mean_features = round(length(unlist(lapply(maldi_data,mass)))/length(maldi_data), digits=2) + + number_features = length(unique(unlist(lapply(maldi_data,mass)))) + spectra_aligned = c(minmz, maxmz,number_features,mean_features, medint,pixel_number) + QC_numbers= cbind(QC_numbers, spectra_aligned) + vectorofactions = append(vectorofactions, "aligned") + + #elif str( $method.methods_conditional.method ) == 'skip_preprocessing': + ##for now as option to filter large files + + #end if + +#end for + +rownames(QC_numbers) = c("min m/z", "max mz", "# features", "median \n# peaks (int.>0)", "median\nintensity", "pixel\nnumber") +colnames(QC_numbers) = vectorofactions +plot(0,type='n',axes=FALSE,ann=FALSE) +grid.table(t(QC_numbers)) + +dev.off() + +## export imzML file +if (length(maldi_data)>0){ + #if $infile.ext == 'rdata' + MALDIquantForeign::exportImzMl(maldi_data, file="out.imzMl", processed=$export_processed, coordinates=cardinal_coordinates) + #else + MALDIquantForeign::exportImzMl(maldi_data, file="out.imzMl", processed=$export_processed) + #end if + +}else{"All spectra are empty, outputfiles will be empty,too."} + + ]]> + </configfile> + </configfiles> + <inputs> + <param name="infile" type="data" format="imzml,rdata,analyze75" label="MSI data" help="Input file as imzML (composite upload), or Cardinal MSImageSet saved as RData (regular upload). The file must be in profile mode, not centroided."/> + <conditional name="restriction_conditional"> + <param name="restriction" type="select" label="Use only spectra of interest" help="This option only works for imzML files"> + <option value="no_restriction" selected="True">No, calculate on entire file</option> + <option value="restrict">Yes, restrict to spectra of interest</option> + </param> + <when value="restrict"> + <param name="coordinates_file" type="data" format="tabular" label="Tabular file with coordinates"/> + <param name="column_x" data_ref="coordinates_file" label="Column with x values" type="data_column"/> + <param name="column_y" data_ref="coordinates_file" label="Column with y values" type="data_column"/> + <param name="coordinates_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/> + </when> + <when value="no_restriction"/> + </conditional> + <repeat name="methods" title="Method" min="1"> + <conditional name="methods_conditional"> + <param name="method" type="select" label="Select a method"> + <option value="Transformation" selected="True">Transformation</option> + <option value="Smoothing">Smoothing</option> + <option value="Baseline">Baseline removal</option> + <option value="Calibrate">Intensity calibration (normalization)</option> + <option value="Align">Align spectra (warping/phase correction)</option> + <option value="skip_preprocessing">Skip preprocessing</option> + <validator type="empty_field" /> + </param> + <when value="Transformation"> + <param name="transform_method" type="select" label="Transformation method"> + <option value="sqrt" selected="True">sqrt</option> + <option value="log">log</option> + <option value="log2">log2</option> + <option value="log10">log10</option> + <validator type="empty_field" /> + </param> + </when> + <when value="Smoothing"> + <conditional name="methods_for_smoothing"> + <param name="smooth_method" type="select" label="Smoothing method" help="This method smoothes the intensity values of a MassSpectrum object."> + <option value="SavitzkyGolay" selected="True">SavitzkyGolay</option> + <option value="MovingAverage">MovingAverage</option> + </param> + <when value="SavitzkyGolay"> + <param name="polynomial" value="3" type="text" label="Polynomial order" + help="Controls the order of the filter, should be smaller than the resulting window."/> + </when> + <when value="MovingAverage"> + <param name="weighted" type="boolean" label="Weighted average" help = "Indicates if the average should be equal weight or if it should have weights depending on the distance from the center as calculated as 1/2^abs(-halfWindowSize:halfWindowSize) with the sum of all weigths normalized to 1" truevalue="TRUE" falsevalue="FALSE"/> + </when> + </conditional> + <param name="halfWindowSize" type="integer" value="10" + label="Half window size" + help="Number of data points, the resulting window reaches from + mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize] + (window size is 2*halfWindowSize+1)."/> + </when> + <when value="Baseline"> + <conditional name="methods_for_baseline"> + <param name="baseline_method" type="select" label="Baseline removal method"> + <option value="SNIP" selected="True">SNIP</option> + <option value="TopHat">TopHat</option> + <option value="ConvexHull">ConvexHull</option> + <option value="median">median</option> + <validator type="empty_field" /> + </param> + <when value="SNIP"> + <param name="iterations" type="integer" value="100" + label="Number of iterations" help="Corresponds to half window size: The resulting window reaches from mass[cur_index-iterations] to mass[cur_index+iterations]"/> + </when> + <when value="TopHat"> + <param name="tophat_halfWindowSize" type="integer" value="10" + label="Half window size" + help="Number of data points, the resulting window reaches from + mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]"/> + </when> + <when value="ConvexHull"/> + <when value="median"> + <param name="median_halfWindowSize" type="integer" value="10" + label="Half window size" + help="Number of data points, the resulting window reaches from + mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize]"/> + </when> + </conditional> + </when> + <when value="Calibrate"> + <param name="calibrate_method" type="select" label="Intensity calibration method" help="Intensity normalization"> + <option value="TIC" selected="True">TIC</option> + <option value="PQN">PQN</option> + <option value="median">median</option> + <validator type="empty_field" /> + </param> + <conditional name="cond_calibration_range"> + <param name="calibration_range" type="select" label="m/z range" help="Instead of the whole m/z range, a specified m/z range can be used to calculate the scaling factor."> + <option value="no" selected="True">complete m/z range</option> + <option value="yes">specify a m/z range</option> + </param> + <when value="no"/> + <when value="yes"> + <param name="mass_start" type="integer" value="800" + label="Start of m/z range, has to be inside m/z range" + help="Scaling factor is calculated on the mass range and applied to the whole spectrum."/> + <param name="mass_end" type="integer" value="3000" + label="End of m/z range, has to be inside m/z range"/> + </when> + </conditional> + </when> + <when value="Align"> + <param name="warping_method" type="select" label="Alignment method"> + <option value="lowess" selected="True">Lowess</option> + <option value="linear">Linear</option> + <option value="quadratic">Quadratic</option> + <option value="cubic">Cubic</option> + </param> + + <param name="tolerance" type="float" value="0.00005" + label="Tolerance" + help="abs(mz1 - mz2)/mz2, maximal relative deviation of a peak position (m/z) to be considered as identical. For 50ppm use 0.00005 or 5e-5" /> + + <param name="halfWindowSize" type="integer" value="20" + label="Half window size" + help="Number of data points, the resulting window reaches from + mass[currentIndex-halfWindowSize] to mass[currentIndex+halfWindowSize] + (window size is 2*halfWindowSize+1). + The best size differs depending on the selected smoothing method."/> + + <param name="peak_method" type="select" label="Noise estimation function"> + <option value="MAD" selected="True">MAD</option> + <option value="SuperSmoother">SuperSmoother</option> + </param> + + <param name="snr" type="integer" value="2" label="Signal-to-noise-ratio"/> + <param name="allow_nomatch" type="boolean" label="Allow no matches" help="Don't throw an error when less than 2 reference m/z were found in a spectrum" truevalue="TRUE" falsevalue="FALSE"/> + <param name="empty_nomatch" type="boolean" label="Empty no matches" help="If TRUE the intensity values of MassSpectrum or MassPeaks objects with missing (NA) warping functions are set to zero" truevalue="TRUE" falsevalue="FALSE"/> + <param name="remove_empty" type="boolean" label="Remove empty spectra" truevalue="TRUE" falsevalue="FALSE" help="For Cardinal RData files this step can only be performed if pixel annotations were provided"/> + + <conditional name="reference_for_alignment"> + <param name="align_ref" type="select" label="Reference" help="If given, samples will be aligned to reference, use internal calibrants to perform m/z calibration"> + <option value="no_reference" selected="True">no reference</option> + <option value="yes_reference">reference from tabular file</option> + </param> + <when value="no_reference"> + <param name="min_frequency" type="float" value="0.9" label = "minFrequency" help="Removal of all peaks which occur in less than minFrequency spectra to generate the reference m/z"/> + </when> + <when value="yes_reference"> + <param name="reference_file" type="data" format="tabular" + label="Reference m/z values" + help="Tabular file"/> + <param name="mz_column" data_ref="reference_file" label="Column with m/z values" type="data_column"/> + <param name="reference_header" type="boolean" label="File contains a header line" truevalue="TRUE" falsevalue="FALSE"/> + <param name="separate_alignment" type="boolean" label="Spectrum wise alignment" help="Internal binning is omitted to avoid interaction between spectra" truevalue="TRUE" falsevalue="FALSE"/> + </when> + </conditional> + </when> + <when value="skip_preprocessing"/> + </conditional> + </repeat> + <param name="export_processed" type="boolean" label="Export processed imzML" help="otherwise continuous imzML will be exported" truevalue="TRUE" falsevalue="FALSE"/> + </inputs> + <outputs> + <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}" /> + <data format="pdf" name="plots" from_work_dir="prepro_qc_plot.pdf" label="${tool.name} on ${on_string}: QC"/> + </outputs> + <tests> + <test> + <param name="infile" value="" ftype="imzml"> + <composite_data value="Example_Continuous.imzML"/> + <composite_data value="Example_Continuous.ibd"/> + </param> + <conditional name="restriction_conditional"> + <param name="restriction" value="restrict"/> + <param name="coordinates_file" value="restricted_pixels.tabular"/> + <param name="column_x" value="1"/> + <param name="column_y" value="2"/> + </conditional> + <conditional name="methods_conditional"> + <param name="method" value="Transformation"/> + <param name="transform_method" value="log2"/> + <param name="method" value="Smoothing"/> + <param name="smooth_method" value="SavitzkyGolay"/> + <param name="method" value="Basline"/> + <param name="baseline_method" value ="TopHat"/> + </conditional> + <output name="outfile_imzml" ftype="imzml" file="preprocessing1.imzml.txt" lines_diff="4"> + <extra_files type="file" file="outfile1.imzml" name="imzml" lines_diff="6"/> + <extra_files type="file" file="outfile1.ibd" name="ibd" compare="sim_size"/> + </output> + <output name="plots" file="Preprocessing1_QC.pdf" compare="sim_size"/> + </test> + <test> + <param name="infile" value="msidata_1.RData" ftype="rdata"/> + <conditional name="methods_conditional"> + <param name="method" value="Align"/> + <param name="warping_method" value="lowess"/> + <param name="halfWindowSize" value="5"/> + <param name="tolerance" value="0.001"/> + <param name="allow_nomatch" value="TRUE"/> + <param name="remove_empty" value="TRUE"/> + <param name="empty_nomatch" value="TRUE"/> + <conditional name="reference_for_alignment"> + <param name="align_ref" value="yes_reference"/> + <param name="reference_file" value="inputpeptides.tabular" ftype="tabular"/> + </conditional> + </conditional> + <output name="outfile_imzml" ftype="imzml" file="preprocessing2.imzml.txt" lines_diff="4"> + <extra_files type="file" file="outfile2.imzml" name="imzml" lines_diff="6"/> + <extra_files type="file" file="outfile2.ibd" name="ibd" compare="sim_size"/> + </output> + <output name="plots" file="Preprocessing2_QC.pdf" compare="sim_size"/> + </test> + <test> + <param name="infile" value="" ftype="imzml"> + <composite_data value="Example_Continuous.imzML"/> + <composite_data value="Example_Continuous.ibd"/> + </param> + <conditional name="methods_conditional"> + <param name="method" value="Calibrate"/> + <param name="calibrate_method" value="median"/> + </conditional> + <output name="outfile_imzml" ftype="imzml" file="preprocessing3.imzml.txt" lines_diff="4"> + <extra_files type="file" file="outfile3.imzml" name="imzml" lines_diff="6"/> + <extra_files type="file" file="outfile3.ibd" name="ibd" compare="sim_size"/> + </output> + <output name="plots" file="Preprocessing3_QC.pdf" compare="sim_size"/> + </test> + </tests> + <help><![CDATA[ + +@MADLI_QUANT_DESCRIPTION@ + +----- + +**Input data** + +- MSI data: 2 types of input data can be used: + + - imzml file (upload imzml and ibd file via the "composite" function) `Introduction to the imzml format <https://ms-imaging.org/wp/imzml/>`_ + - Cardinal "MSImageSet" data saved as .RData + +- Optional: Tabular file with pixel coordinates to restrict reading of imzML files to coordinates of interest. Tabular files with any header name or no header at all are supported. + + :: + + x_coord y_coord + 1 1 + 2 1 + 3 1 + ... + ... + +- Optional: Tabular file with reference m/z for the spectra align function. At least 2 m/z values of the input list must be present in every spectrum to peform the alignment. First column must contain m/z values, without empty fields or letters. Tabular files with any header name or no header at all are supported. + + :: + + + m/z + 100.0 + 100.01 + 100.02 + ... + ... + + + +**Options** + +- Transformation: Variance stabilization through intensity transformation:'log', 'log2', 'log10' and 'squareroot' (sqrt) are available +- Smoothing: Smoothing of the peaks reduces noise and improves peak detection. Available smoothing methods are 'SavitzkyGolay' and 'Moving Average' + + - For all smoothing methods: The larger the 'Half window size', the stronger the smoothing. The resulting window should be smaller than the FWHM (full width at half maximum) of the typical peaks. Moving average needs smaller window size than SavitzkyGolay. + - Moving average: Recommended for broader peaks/high m/z range spectra. Weighted moving average: Points in the center get larger weight factors than points away from the center. + - SavitzkyGolay: Recommended for sharp peaks/low m/z range, preserves the shape of the local maxima. The PolynomialOrder should be smaller than the resulting window. Negative values will be replaced with 0. + +- Baseline reduction: Baseline reduction removes background intensity generated by chemical noise (common in MALDI datasets). + + - Available methods are SNIP, TopHat,ConvexHull and median: + - SNIP is the default baseline reduction method in MALDIquant. + - ConvexHull is not appropriate for MALDI-TOF baseline removal. + - The moving median may generate negative intensities. + - Except for the ConvexHull all methods have a parameter for the 'Half window size' (in SNIP it is called 'iterations'). The smaller the window the more baseline will be removed but also parts of the peaks. Wider windows preserve the peak height better and produce a smoother baseline, but some local background variation will remain. + +- Intensity calibration (normalization): Normalization of intensities to Total Ion Current (TIC), median spectrum, Probabilistic Quotient Normalization (PQN) + + - TIC and median are local calibration methods: each spectrum is normalized on its own (each peak is divided by the TIC or median of the spectrum) + - PQN is a global calibration method: In PQN all spectra are calibrated using the TIC calibration first. Subsequently, a median reference spectrum is created and the intensities in all spectra are standardized using the reference spectrum and a spectrum-specific median is calculated for each spectrum. Finally, each spectrum is rescaled by the median of the ratios of its intensity values and that of the reference spectrum + +- Spectra alignment (warping): alignment for (re)calibration of m/z values. + + - peak detection is performed, the reference peaks will be matched to those detected peaks + - without external reference m/z: internal reference is obtained by filtering and binning the picked peaks to find landmark peaks and their average m/z + - with external reference m/z: the given m/z are used as a reference, at least 10 reference values are recommended + - non linear warping (parametric time warping plus binning) to match the reference peaks (internal or external) to the present (picked) peaks with the given tolerance. At least two m/z per spectrum are needed for the alignment. To prevent an error when this criterium is not fullfilled, "Don't throw an error when less than 2 reference m/z were found in a spectrum" should be set to yes. If the not aligned spectra should be set to zero select yes in "If TRUE the intensity values of MassSpectrum or MassPeaks objects with missing (NA) warping functions are set to zero". In order to remove such empty spectra set "Should empty spectra be removed" to yes. + + +**Output** + +- imzML file (imzML format can be continuous or processed) +- PDF with average mass spectra after each preprocessing step + +.. _MALDIquant: http://strimmerlab.org/software/maldiquant/ + + ]]> + </help> + <expand macro="citation"/> +</tool> +