changeset 1:1b22c1e7bfe7 draft

planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/cardinal commit d2f311f7fff24e54c565127c40414de708e31b3c
author galaxyp
date Thu, 25 Oct 2018 07:29:29 -0400
parents 8c05a34f160a
children 1b875f0b8024
files macros.xml preprocessing.xml test-data/Heatmaps_LM8_file16.pdf test-data/Heatmaps_analyze75.pdf test-data/Heatmaps_imzml.pdf test-data/Heatmaps_rdata.pdf test-data/Plot_analyze75.pdf test-data/Plot_analyze75_allpixels.pdf test-data/Plot_empty_spectra.pdf test-data/Plot_imzml.pdf test-data/Plot_rdata.pdf test-data/QC_analyze75.pdf test-data/QC_empty_spectra.pdf test-data/QC_imzml.pdf test-data/QC_rdata.pdf test-data/analyze75.svg test-data/analyze75_filtered2.pdf test-data/analyze_filteredoutside.RData test-data/centroids_rdata.pdf test-data/cluster_skm.RData test-data/imzml_filtered2.RData test-data/imzml_filtered2.pdf test-data/imzml_filtered3.RData test-data/imzml_filtered3.pdf test-data/imzml_filtered4.RData test-data/imzml_filtered4.pdf test-data/imzml_filtered5.RData test-data/imzml_filtered5.pdf test-data/kmeans_analyze.pdf test-data/pca_imzml.pdf test-data/rdata_notfiltered.RData test-data/rdata_notfiltered.pdf test-data/test1.pdf test-data/test2.pdf test-data/test2.rdata test-data/test3.pdf test-data/test4.pdf test-data/test4.rdata test-data/test5.pdf test-data/test6.pdf test-data/test6.rdata test-data/test7.pdf test-data/test7.rdata
diffstat 42 files changed, 84 insertions(+), 36 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Mon Oct 01 01:07:44 2018 -0400
+++ b/macros.xml	Thu Oct 25 07:29:29 2018 -0400
@@ -4,10 +4,17 @@
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@VERSION@">bioconductor-cardinal</requirement>
+            <requirement type="package" version="3.5.1">r-base</requirement>
             <yield/>
         </requirements>
     </xml>
 
+    <xml name="print_version">
+        <version_command><![CDATA[
+echo $(R --version | grep version | grep -v GNU)", Cardinal version" $(R --vanilla --slave -e "library(Cardinal); cat(sessionInfo()\$otherPkgs\$Cardinal\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
+        ]]></version_command>
+    </xml>
+
     <token name="@INPUT_LINKING@"><![CDATA[
         #if $infile.ext == 'imzml'
             ln -s '${infile.extra_files_path}/imzml' infile.imzML &&
@@ -33,14 +40,14 @@
 
         #if $infile.ext == 'imzml'
             #if str($processed_cond.processed_file) == "processed":
-                msidata <- readImzML('infile', mass.accuracy=$processed_cond.accuracy, units.accuracy = "$processed_cond.units")
+                msidata <- readImzML('infile', mass.accuracy=$processed_cond.accuracy, units.accuracy = "$processed_cond.units", attach.only=TRUE)
                 centroided(msidata) = $centroids
             #else
-                msidata <- readImzML('infile')
+                msidata <- readImzML('infile', attach.only=TRUE)
                 centroided(msidata) = $centroids
             #end if
         #elif $infile.ext == 'analyze75'
-            msidata = readAnalyze('infile')
+            msidata = readAnalyze('infile', attach.only=TRUE)
             centroided(msidata) = $centroids
         #else
             msidata = loadRData('infile.RData')
@@ -177,7 +184,8 @@
         <param name="filename" type="text" value="" label="Title" help="Will appear in the pdf output, if nothing given it will take the dataset name">
             <sanitizer invalid_char="">
                 <valid initial="string.ascii_letters,string.digits">
-                    <add value="_" />
+                    <add value="_"/>
+                    <add value=" "/>
                 </valid>
             </sanitizer>
         </param>
@@ -198,12 +206,12 @@
         <param name="feature_header" type="boolean" label="Tabular file contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
     </xml>
 
-    <xml name="reading_2_column_mz_tabular">
-        <param name="calibrant_file" type="data" optional="true" format="tabular"
+    <xml name="reading_2_column_mz_tabular" token_optional="false">
+        <param name="calibrant_file" type="data" optional="@OPTIONAL@" format="tabular"
             label="m/z of interest (e.g. internal Calibrants)" help="one column with m/z values, optional second column with names (m/z values can also be selected as name)"/>
-        <param name="mz_column" data_ref="calibrant_file" label="Column with m/z values" type="data_column"/>
-        <param name="name_column" data_ref="calibrant_file" label="Column with name of m/z values" type="data_column"/>
-        <param name="calibrant_header" type="boolean" label="Tabular file contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
+        <param name="mz_column" data_ref="calibrant_file" optional="@OPTIONAL@" label="Column with m/z values" type="data_column"/>
+        <param name="name_column" data_ref="calibrant_file" optional="@OPTIONAL@" label="Column with name of m/z values" type="data_column"/>
+        <param name="calibrant_header" type="boolean" optional="@OPTIONAL@" label="Tabular file contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
     </xml>
 
     <xml name="reading_pixel_annotations">
--- a/preprocessing.xml	Mon Oct 01 01:07:44 2018 -0400
+++ b/preprocessing.xml	Thu Oct 25 07:29:29 2018 -0400
@@ -1,4 +1,4 @@
-<tool id="cardinal_preprocessing" name="MSI preprocessing" version="@VERSION@.0">
+<tool id="cardinal_preprocessing" name="MSI preprocessing" version="@VERSION@.1">
     <description>
         mass spectrometry imaging preprocessing
     </description>
@@ -6,16 +6,26 @@
         <import>macros.xml</import>
     </macros>
     <expand macro="requirements">
-        <requirement type="package" version="2.2.1">r-gridextra</requirement>
-        <requirement type="package" version="0.20-35">r-lattice</requirement>
-        <requirement type="package" version="2.2.1">r-ggplot2</requirement>
+        <requirement type="package" version="2.3">r-gridextra</requirement>
+        <requirement type="package" version="3.0">r-ggplot2</requirement>
+        <requirement type="package" version="0.20_35">r-lattice</requirement>
     </expand>
     <command detect_errors="exit_code">
     <![CDATA[
 
         @INPUT_LINKING@
         cat '${cardinal_preprocessing}' &&
-        Rscript '${cardinal_preprocessing}'
+        Rscript '${cardinal_preprocessing}' &&
+
+        #if $imzml_output:
+        mkdir $outfile_imzml.files_path &&
+        ls -l &&
+            mv ./out.imzML "${os.path.join($outfile_imzml.files_path, 'imzml')}" | true &&
+            mv ./out.ibd "${os.path.join($outfile_imzml.files_path, 'ibd')}" | true &&
+        #end if
+            echo "imzML file:" > $outfile_imzml &&
+            ls -l "$outfile_imzml.files_path" >> $outfile_imzml
+
 
     ]]>
     </command>
@@ -31,6 +41,11 @@
 
 @READING_MSIDATA@
 
+
+## remove duplicated coordinates, otherwise peak picking and log2 transformation will fail
+print(paste0(sum(duplicated(coord(msidata))), " duplicated coordinates were removed"))
+msidata <- msidata[,!duplicated(coord(msidata))]
+
 print(paste0("Number of NA in input file: ",sum(is.na(spectra(msidata)[]))))
 
 
@@ -127,10 +142,6 @@
             print('Peak_picking')
             ## Peakpicking
 
-            ## remove duplicated coordinates, otherwise peak picking will fail
-            print(paste0(sum(duplicated(coord(msidata))), " duplicated coordinates were removed"))
-            msidata <- msidata[,!duplicated(coord(msidata))]
-
             #if str( $method.methods_conditional.methods_for_picking.picking_method) == 'adaptive':
                 print('adaptive peakpicking')
 
@@ -234,6 +245,12 @@
 
                 msidata = reduceDimension(msidata, method="bin", width=$method.methods_conditional.methods_for_reduction.bin_width, units="$method.methods_conditional.methods_for_reduction.bin_units", fun=$method.methods_conditional.methods_for_reduction.bin_fun)
 
+                ## optional: replace NA with 0
+                #if $method.methods_conditional.methods_for_reduction.replace_NA_bin:
+                    print(paste0("Number of NA that were set to zero after binning:",sum(is.na(spectra(msidata)[]))))
+                    spectra(msidata)[][is.na(spectra(msidata)[])] = 0
+                #end if
+
             #elif str( $method.methods_conditional.methods_for_reduction.reduction_method) == 'resample':
                 print('resample reduction')
 
@@ -275,9 +292,17 @@
             #if str( $method.methods_conditional.transf_conditional.trans_type) == 'log2':
                 print('log2 transformation')
 
-                spectra(msidata)[][spectra(msidata)[] ==0] = NA
-                print(paste0("Number of 0 which were converted into NA:",sum(is.na(spectra(msidata)[]))))
-                spectra(msidata)[] = log2(spectra(msidata)[])
+                ## replace 0 with NA
+                spectra_df = spectra(msidata)[]
+                spectra_df[spectra_df ==0] = NA
+                print(paste0("Number of 0 which were converted into NA:",sum(is.na(spectra_df))))
+                spectra(msidata) = spectra_df
+                ## log transformation
+                spectra(msidata) = log2(spectra(msidata))
+                ## optional: replace NA with 0
+                #if $method.methods_conditional.transf_conditional.replace_NA_trans:
+                    spectra(msidata)[][is.na(spectra(msidata)[])] = 0
+                #end if
 
             #elif str( $method.methods_conditional.transf_conditional.trans_type) == 'sqrt':
                 print('squareroot transformation')
@@ -300,7 +325,7 @@
             #end if
     #end for
 
-    ############# Outputs: RData and QC report #############
+    ############# Outputs: RData, imzml and QC report #############
     ################################################################################
 
     print(paste0("Number of NA in output file: ",sum(is.na(spectra(msidata)[]))))
@@ -308,6 +333,13 @@
     ## save as (.RData)
     save(msidata, file="$msidata_preprocessed")
 
+    ## save msidata as imzML file, will only work if there is at least 1 m/z left
+    #if $imzml_output:
+        if (nrow(msidata) > 0){
+print("write outputfile")
+            writeImzML(msidata, "out")}
+    #end if
+
     ## save QC report
 
     pdf("Preprocessing.pdf", fonts = "Times", pointsize = 12)
@@ -455,6 +487,7 @@
                                     <option value="mean" selected="True">mean</option>
                                     <option value="sum">sum</option>
                             </param>
+                            <param name="replace_NA_bin" type="boolean" label="Replace NA with 0" truevalue="TRUE" falsevalue="FALSE" help="Binning can introduce NAs, should they be replaced with 0"/>
                         </when>
                         <when value="resample">
                             <param name="resample_step" type="float" value="1"
@@ -487,16 +520,22 @@
                             <option value="log2" selected="True">log2</option>
                             <option value="sqrt">sqrt</option>
                         </param>
-                            <when value="log2"/>
+                            <when value="log2">
+                                <param name="replace_NA_trans" type="boolean" label="Replace NA with 0" truevalue="TRUE" falsevalue="FALSE" help="0 values are set to NA before log2 transformation, after transformation they can be set back to 0"/>
+                            </when>
                             <when value="sqrt"/>
                     </conditional>
                 </when>
             </conditional>
         </repeat>
+        <param name="imzml_output" type="boolean" label="Output of imzML file" truevalue="TRUE" falsevalue="FALSE"/>
     </inputs>
     <outputs>
         <data format="rdata" name="msidata_preprocessed" label="${tool.name} on ${on_string}"/>
         <data format="pdf" name="QC_overview" from_work_dir="Preprocessing.pdf" label = "${tool.name} on ${on_string}: QC"/>
+        <data format="imzml" name="outfile_imzml" label="${tool.name} on ${on_string}: imzML">
+            <filter>imzml_output</filter>
+       </data>
     </outputs>
     <tests>
         <test>
@@ -650,7 +689,7 @@
 This tool provides multiple Cardinal functions to preprocess mass spectrometry imaging data. 
 
 @MSIDATA_INPUT_DESCRIPTION@
-- Coordinates stored as decimals rather than integers will be rounded to obtain a regular pixel grid. This might lead to duplicated coordinates which will be automatically removed before peak picking.
+- Coordinates stored as decimals rather than integers will be rounded to obtain a regular pixel grid. This might lead to duplicated coordinates which will be automatically removed after the data is read by the tool.
 @MZ_TABULAR_INPUT_DESCRIPTION@
 
 **Options**
@@ -671,7 +710,8 @@
 
 **Output**
 
-- imzML file, preprocessed
+- MSI data as .RData output (can be read with the Cardinal package in R)
+- optional: MSI data as imzML file
 - pdf with key values after each processing step
 
         ]]>
Binary file test-data/Heatmaps_LM8_file16.pdf has changed
Binary file test-data/Heatmaps_analyze75.pdf has changed
Binary file test-data/Heatmaps_imzml.pdf has changed
Binary file test-data/Heatmaps_rdata.pdf has changed
Binary file test-data/Plot_analyze75.pdf has changed
Binary file test-data/Plot_analyze75_allpixels.pdf has changed
Binary file test-data/Plot_empty_spectra.pdf has changed
Binary file test-data/Plot_imzml.pdf has changed
Binary file test-data/Plot_rdata.pdf has changed
Binary file test-data/QC_analyze75.pdf has changed
Binary file test-data/QC_empty_spectra.pdf has changed
Binary file test-data/QC_imzml.pdf has changed
Binary file test-data/QC_rdata.pdf has changed
--- a/test-data/analyze75.svg	Mon Oct 01 01:07:44 2018 -0400
+++ b/test-data/analyze75.svg	Thu Oct 25 07:29:29 2018 -0400
@@ -1,15 +1,15 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="216pt" height="216pt" viewBox="0 0 216 216" version="1.1">
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="504pt" height="504pt" viewBox="0 0 504 504" version="1.1">
 <g id="surface1">
-<rect x="0" y="0" width="216" height="216" style="fill:rgb(100%,100%,100%);fill-opacity:1;stroke:none;"/>
-<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,100%,80%);fill-opacity:1;" d="M 0 0 L 72 0 L 72 72 L 0 72 Z M 0 0 "/>
-<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,100%,32.156863%);fill-opacity:1;" d="M 0 72 L 72 72 L 72 144 L 0 144 Z M 0 72 "/>
-<path style=" stroke:none;fill-rule:nonzero;fill:rgb(100%,3.921569%,0%);fill-opacity:1;" d="M 0 144 L 72 144 L 72 216 L 0 216 Z M 0 144 "/>
-<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 72 0 L 144 0 L 144 72 L 72 72 Z M 72 0 "/>
-<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,0.392157%,33.333333%);fill-opacity:1;" d="M 72 72 L 144 72 L 144 144 L 72 144 Z M 72 72 "/>
-<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,1.568627%,93.72549%);fill-opacity:1;" d="M 72 144 L 144 144 L 144 216 L 72 216 Z M 72 144 "/>
-<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,0.784314%,54.509804%);fill-opacity:1;" d="M 144 0 L 216 0 L 216 72 L 144 72 Z M 144 0 "/>
-<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,1.568627%,81.568627%);fill-opacity:1;" d="M 144 72 L 216 72 L 216 144 L 144 144 Z M 144 72 "/>
-<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,1.960784%,100%);fill-opacity:1;" d="M 144 144 L 216 144 L 216 216 L 144 216 Z M 144 144 "/>
+<rect x="0" y="0" width="504" height="504" style="fill:rgb(100%,100%,100%);fill-opacity:1;stroke:none;"/>
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,100%,80%);fill-opacity:1;" d="M 0 504 L 168 504 L 168 336 L 0 336 Z M 0 504 "/>
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,100%,32.156863%);fill-opacity:1;" d="M 0 336 L 168 336 L 168 168 L 0 168 Z M 0 336 "/>
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(100%,3.921569%,0%);fill-opacity:1;" d="M 0 168 L 168 168 L 168 0 L 0 0 Z M 0 168 "/>
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 168 504 L 336 504 L 336 336 L 168 336 Z M 168 504 "/>
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,0.392157%,33.333333%);fill-opacity:1;" d="M 168 336 L 336 336 L 336 168 L 168 168 Z M 168 336 "/>
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,1.568627%,93.72549%);fill-opacity:1;" d="M 168 168 L 336 168 L 336 0 L 168 0 Z M 168 168 "/>
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,0.784314%,54.509804%);fill-opacity:1;" d="M 336 504 L 504 504 L 504 336 L 336 336 Z M 336 504 "/>
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,1.568627%,81.568627%);fill-opacity:1;" d="M 336 336 L 504 336 L 504 168 L 336 168 Z M 336 336 "/>
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,1.960784%,100%);fill-opacity:1;" d="M 336 168 L 504 168 L 504 0 L 336 0 Z M 336 168 "/>
 </g>
 </svg>
Binary file test-data/analyze75_filtered2.pdf has changed
Binary file test-data/analyze_filteredoutside.RData has changed
Binary file test-data/centroids_rdata.pdf has changed
Binary file test-data/cluster_skm.RData has changed
Binary file test-data/imzml_filtered2.pdf has changed
Binary file test-data/imzml_filtered3.RData has changed
Binary file test-data/imzml_filtered3.pdf has changed
Binary file test-data/imzml_filtered4.RData has changed
Binary file test-data/imzml_filtered4.pdf has changed
Binary file test-data/imzml_filtered5.RData has changed
Binary file test-data/imzml_filtered5.pdf has changed
Binary file test-data/kmeans_analyze.pdf has changed
Binary file test-data/pca_imzml.pdf has changed
Binary file test-data/rdata_notfiltered.RData has changed
Binary file test-data/rdata_notfiltered.pdf has changed
Binary file test-data/test1.pdf has changed
Binary file test-data/test2.pdf has changed
Binary file test-data/test2.rdata has changed
Binary file test-data/test3.pdf has changed
Binary file test-data/test4.pdf has changed
Binary file test-data/test4.rdata has changed
Binary file test-data/test5.pdf has changed
Binary file test-data/test6.pdf has changed
Binary file test-data/test6.rdata has changed
Binary file test-data/test7.pdf has changed
Binary file test-data/test7.rdata has changed