Mercurial > repos > galaxyp > cardinal_preprocessing
diff preprocessing.xml @ 1:1b22c1e7bfe7 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit d2f311f7fff24e54c565127c40414de708e31b3c
author | galaxyp |
---|---|
date | Thu, 25 Oct 2018 07:29:29 -0400 |
parents | 8c05a34f160a |
children | 1b875f0b8024 |
line wrap: on
line diff
--- a/preprocessing.xml Mon Oct 01 01:07:44 2018 -0400 +++ b/preprocessing.xml Thu Oct 25 07:29:29 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="cardinal_preprocessing" name="MSI preprocessing" version="@VERSION@.0"> +<tool id="cardinal_preprocessing" name="MSI preprocessing" version="@VERSION@.1"> <description> mass spectrometry imaging preprocessing </description> @@ -6,16 +6,26 @@ <import>macros.xml</import> </macros> <expand macro="requirements"> - <requirement type="package" version="2.2.1">r-gridextra</requirement> - <requirement type="package" version="0.20-35">r-lattice</requirement> - <requirement type="package" version="2.2.1">r-ggplot2</requirement> + <requirement type="package" version="2.3">r-gridextra</requirement> + <requirement type="package" version="3.0">r-ggplot2</requirement> + <requirement type="package" version="0.20_35">r-lattice</requirement> </expand> <command detect_errors="exit_code"> <![CDATA[ @INPUT_LINKING@ cat '${cardinal_preprocessing}' && - Rscript '${cardinal_preprocessing}' + Rscript '${cardinal_preprocessing}' && + + #if $imzml_output: + mkdir $outfile_imzml.files_path && + ls -l && + mv ./out.imzML "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true && + mv ./out.ibd "${os.path.join($outfile_imzml.files_path, 'ibd')}" | true && + #end if + echo "imzML file:" > $outfile_imzml && + ls -l "$outfile_imzml.files_path" >> $outfile_imzml + ]]> </command> @@ -31,6 +41,11 @@ @READING_MSIDATA@ + +## remove duplicated coordinates, otherwise peak picking and log2 transformation will fail +print(paste0(sum(duplicated(coord(msidata))), " duplicated coordinates were removed")) +msidata <- msidata[,!duplicated(coord(msidata))] + print(paste0("Number of NA in input file: ",sum(is.na(spectra(msidata)[])))) @@ -127,10 +142,6 @@ print('Peak_picking') ## Peakpicking - ## remove duplicated coordinates, otherwise peak picking will fail - print(paste0(sum(duplicated(coord(msidata))), " duplicated coordinates were removed")) - msidata <- msidata[,!duplicated(coord(msidata))] - #if str( $method.methods_conditional.methods_for_picking.picking_method) == 'adaptive': print('adaptive peakpicking') @@ -234,6 +245,12 @@ msidata = reduceDimension(msidata, method="bin", width=$method.methods_conditional.methods_for_reduction.bin_width, units="$method.methods_conditional.methods_for_reduction.bin_units", fun=$method.methods_conditional.methods_for_reduction.bin_fun) + ## optional: replace NA with 0 + #if $method.methods_conditional.methods_for_reduction.replace_NA_bin: + print(paste0("Number of NA that were set to zero after binning:",sum(is.na(spectra(msidata)[])))) + spectra(msidata)[][is.na(spectra(msidata)[])] = 0 + #end if + #elif str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'resample': print('resample reduction') @@ -275,9 +292,17 @@ #if str( $method.methods_conditional.transf_conditional.trans_type) == 'log2': print('log2 transformation') - spectra(msidata)[][spectra(msidata)[] ==0] = NA - print(paste0("Number of 0 which were converted into NA:",sum(is.na(spectra(msidata)[])))) - spectra(msidata)[] = log2(spectra(msidata)[]) + ## replace 0 with NA + spectra_df = spectra(msidata)[] + spectra_df[spectra_df ==0] = NA + print(paste0("Number of 0 which were converted into NA:",sum(is.na(spectra_df)))) + spectra(msidata) = spectra_df + ## log transformation + spectra(msidata) = log2(spectra(msidata)) + ## optional: replace NA with 0 + #if $method.methods_conditional.transf_conditional.replace_NA_trans: + spectra(msidata)[][is.na(spectra(msidata)[])] = 0 + #end if #elif str( $method.methods_conditional.transf_conditional.trans_type) == 'sqrt': print('squareroot transformation') @@ -300,7 +325,7 @@ #end if #end for - ############# Outputs: RData and QC report ############# + ############# Outputs: RData, imzml and QC report ############# ################################################################################ print(paste0("Number of NA in output file: ",sum(is.na(spectra(msidata)[])))) @@ -308,6 +333,13 @@ ## save as (.RData) save(msidata, file="$msidata_preprocessed") + ## save msidata as imzML file, will only work if there is at least 1 m/z left + #if $imzml_output: + if (nrow(msidata) > 0){ +print("write outputfile") + writeImzML(msidata, "out")} + #end if + ## save QC report pdf("Preprocessing.pdf", fonts = "Times", pointsize = 12) @@ -455,6 +487,7 @@ <option value="mean" selected="True">mean</option> <option value="sum">sum</option> </param> + <param name="replace_NA_bin" type="boolean" label="Replace NA with 0" truevalue="TRUE" falsevalue="FALSE" help="Binning can introduce NAs, should they be replaced with 0"/> </when> <when value="resample"> <param name="resample_step" type="float" value="1" @@ -487,16 +520,22 @@ <option value="log2" selected="True">log2</option> <option value="sqrt">sqrt</option> </param> - <when value="log2"/> + <when value="log2"> + <param name="replace_NA_trans" type="boolean" label="Replace NA with 0" truevalue="TRUE" falsevalue="FALSE" help="0 values are set to NA before log2 transformation, after transformation they can be set back to 0"/> + </when> <when value="sqrt"/> </conditional> </when> </conditional> </repeat> + <param name="imzml_output" type="boolean" label="Output of imzML file" truevalue="TRUE" falsevalue="FALSE"/> </inputs> <outputs> <data format="rdata" name="msidata_preprocessed" label="${tool.name} on ${on_string}"/> <data format="pdf" name="QC_overview" from_work_dir="Preprocessing.pdf" label = "${tool.name} on ${on_string}: QC"/> + <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML"> + <filter>imzml_output</filter> + </data> </outputs> <tests> <test> @@ -650,7 +689,7 @@ This tool provides multiple Cardinal functions to preprocess mass spectrometry imaging data. @MSIDATA_INPUT_DESCRIPTION@ -- Coordinates stored as decimals rather than integers will be rounded to obtain a regular pixel grid. This might lead to duplicated coordinates which will be automatically removed before peak picking. +- Coordinates stored as decimals rather than integers will be rounded to obtain a regular pixel grid. This might lead to duplicated coordinates which will be automatically removed after the data is read by the tool. @MZ_TABULAR_INPUT_DESCRIPTION@ **Options** @@ -671,7 +710,8 @@ **Output** -- imzML file, preprocessed +- MSI data as .RData output (can be read with the Cardinal package in R) +- optional: MSI data as imzML file - pdf with key values after each processing step ]]>