Mercurial > repos > galaxyp > cardinal_quality_report

--- a/macros.xml	Mon Oct 01 01:07:13 2018 -0400
+++ b/macros.xml	Thu Oct 25 07:28:42 2018 -0400
@@ -4,10 +4,17 @@
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@VERSION@">bioconductor-cardinal</requirement>
+            <requirement type="package" version="3.5.1">r-base</requirement>
             <yield/>
         </requirements>
     </xml>

+    <xml name="print_version">
+        <version_command><![CDATA[
+echo $(R --version | grep version | grep -v GNU)", Cardinal version" $(R --vanilla --slave -e "library(Cardinal); cat(sessionInfo()\$otherPkgs\$Cardinal\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
+        ]]></version_command>
+    </xml>
+
     <token name="@INPUT_LINKING@"><![CDATA[
         #if $infile.ext == 'imzml'
             ln -s '${infile.extra_files_path}/imzml' infile.imzML &&
@@ -33,14 +40,14 @@

         #if $infile.ext == 'imzml'
             #if str($processed_cond.processed_file) == "processed":
-                msidata <- readImzML('infile', mass.accuracy=$processed_cond.accuracy, units.accuracy = "$processed_cond.units")
+                msidata <- readImzML('infile', mass.accuracy=$processed_cond.accuracy, units.accuracy = "$processed_cond.units", attach.only=TRUE)
                 centroided(msidata) = $centroids
             #else
-                msidata <- readImzML('infile')
+                msidata <- readImzML('infile', attach.only=TRUE)
                 centroided(msidata) = $centroids
             #end if
         #elif $infile.ext == 'analyze75'
-            msidata = readAnalyze('infile')
+            msidata = readAnalyze('infile', attach.only=TRUE)
             centroided(msidata) = $centroids
         #else
             msidata = loadRData('infile.RData')
@@ -177,7 +184,8 @@
         <param name="filename" type="text" value="" label="Title" help="Will appear in the pdf output, if nothing given it will take the dataset name">
             <sanitizer invalid_char="">
                 <valid initial="string.ascii_letters,string.digits">
-                    <add value="_" />
+                    <add value="_"/>
+                    <add value=" "/>
                 </valid>
             </sanitizer>
         </param>
@@ -198,12 +206,12 @@
         <param name="feature_header" type="boolean" label="Tabular file contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
     </xml>

-    <xml name="reading_2_column_mz_tabular">
-        <param name="calibrant_file" type="data" optional="true" format="tabular"
+    <xml name="reading_2_column_mz_tabular" token_optional="false">
+        <param name="calibrant_file" type="data" optional="@OPTIONAL@" format="tabular"
             label="m/z of interest (e.g. internal Calibrants)" help="one column with m/z values, optional second column with names (m/z values can also be selected as name)"/>
-        <param name="mz_column" data_ref="calibrant_file" label="Column with m/z values" type="data_column"/>
-        <param name="name_column" data_ref="calibrant_file" label="Column with name of m/z values" type="data_column"/>
-        <param name="calibrant_header" type="boolean" label="Tabular file contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
+        <param name="mz_column" data_ref="calibrant_file" optional="@OPTIONAL@" label="Column with m/z values" type="data_column"/>
+        <param name="name_column" data_ref="calibrant_file" optional="@OPTIONAL@" label="Column with name of m/z values" type="data_column"/>
+        <param name="calibrant_header" type="boolean" optional="@OPTIONAL@" label="Tabular file contains a header line" truevalue="TRUE" falsevalue="FALSE"/>
     </xml>

     <xml name="reading_pixel_annotations">
--- a/quality_report.xml	Mon Oct 01 01:07:13 2018 -0400
+++ b/quality_report.xml	Thu Oct 25 07:28:42 2018 -0400
@@ -1,4 +1,4 @@
-<tool id="cardinal_quality_report" name="MSI Qualitycontrol" version="@VERSION@.0">
+<tool id="cardinal_quality_report" name="MSI Qualitycontrol" version="@VERSION@.1">
     <description>
         mass spectrometry imaging QC
     </description>
@@ -6,12 +6,12 @@
         <import>macros.xml</import>
     </macros>
     <expand macro="requirements">
-        <requirement type="package" version="2.2.1">r-ggplot2</requirement>
         <requirement type="package" version="1.1_2">r-rcolorbrewer</requirement>
-        <requirement type="package" version="2.2.1">r-gridextra</requirement>
+        <requirement type="package" version="2.3">r-gridextra</requirement>
+        <requirement type="package" version="3.0">r-ggplot2</requirement>
         <requirement type="package" version="2.23_15">r-kernsmooth</requirement>
-        <requirement type="package" version="0.5.0">r-scales</requirement>
-        <requirement type="package" version="1.0.8"> r-pheatmap</requirement>
+        <requirement type="package" version="1.0.0">r-scales</requirement>
+        <requirement type="package" version="1.0.10"> r-pheatmap</requirement>
     </expand>
     <command detect_errors="exit_code">
     <![CDATA[
@@ -33,6 +33,7 @@
 library(scales)
 library(pheatmap)

+
 @READING_MSIDATA@

 ## remove duplicated coordinates
@@ -362,7 +363,7 @@
     peaksperpixel = colSums(spectra(msidata)[]> 0, na.rm=TRUE)
     peakscoordarray=cbind(coord(msidata)[,1:2], peaksperpixel)

-    print(ggplot(peakscoordarray, aes(x=x, y=y, fill=peaksperpixel), colour=colo)+
+    print(ggplot(peakscoordarray, aes(x=x, y=y, fill=peaksperpixel))+
      geom_tile() + coord_fixed() +
      ggtitle("Number of peaks per spectrum")+
      theme_bw() +
@@ -375,9 +376,8 @@
     ############################### 6) TIC image ###############################

     TICcoordarray=cbind(coord(msidata)[,1:2], TICs)
-    colo = colorRampPalette(
-    c("blue", "cyan", "green", "yellow","red"))
-    print(ggplot(TICcoordarray, aes(x=x, y=y, fill=TICs), colour=colo)+
+
+    print(ggplot(TICcoordarray, aes(x=x, y=y, fill=TICs))+
      geom_tile() + coord_fixed() +
      ggtitle("Total Ion Chromatogram")+
      theme_bw() +
@@ -386,6 +386,20 @@
      scale_fill_gradientn(colours = c("blue", "purple" , "red","orange")
                             ,space = "Lab", na.value = "black", name = "TIC"))

+    ############################### 6b) median int image ###############################
+
+    median_int = apply(spectra(msidata)[],2,median)
+    median_coordarray=cbind(coord(msidata)[,1:2], median_int)
+    print(ggplot(median_coordarray, aes(x=x, y=y, fill=median_int))+
+     geom_tile() + coord_fixed() +
+     ggtitle("Median intensity per pixel")+
+     theme_bw() +
+     theme(plot.title = element_text(hjust = 0.5))+
+     theme(text=element_text(family="ArialMT", face="bold", size=12))+
+     scale_fill_gradientn(colours = c("blue", "purple" , "red","orange")
+                            ,space = "Lab", na.value = "black", name = "median\nintensity"))
+
+

     ############################### 7) Most abundant m/z image #################

@@ -407,6 +421,7 @@

     #if $do_pca:

+        set.seed(1)
         pca = PCA(msidata, ncomp=2)
         par(mfrow = c(2,1))
         plot(pca, col=c("black", "darkgrey"), main="PCA for two components")
@@ -499,7 +514,6 @@
     ########################## 12) Number of peaks per m/z #####################

     peakspermz = rowSums(spectra(msidata)[] > 0, na.rm=TRUE)
-print(median(peakspermz/pixelcount*100))

     par(mfrow = c(2,1), mar=c(5,6,4,4.5))
     ## 12a) scatterplot
@@ -600,7 +614,7 @@

         heatmap.parameters <- list(corr_matrix,
         show_rownames = T, show_colnames = T,
-        main = "Pearson correlation on mean intensities for each annotation group")
+        main = "Pearson correlation on mean intensities")
         do.call("pheatmap", heatmap.parameters)
     }

@@ -639,8 +653,8 @@
         for (mass in 1:length(inputcalibrantmasses)){

             ### define the plot window with xmin und xmax
-            minmasspixel = features(msidata_no_NA, mz=inputcalibrantmasses[mass]-1)
-            maxmasspixel = features(msidata_no_NA, mz=inputcalibrantmasses[mass]+3)
+            minmasspixel = features(msidata_no_NA, mz=inputcalibrantmasses[mass]-0.5)
+            maxmasspixel = features(msidata_no_NA, mz=inputcalibrantmasses[mass]+1.5)

             ### find m/z with the highest mean intensity in m/z range (red line in plot 16) and calculate ppm difference for plot 17
             filtered_data = msidata_no_NA[mz(msidata_no_NA) >= inputcalibrantmasses[mass]-plusminusvalues[mass] & mz(msidata_no_NA) <= inputcalibrantmasses[mass]+plusminusvalues[mass],]
@@ -662,19 +676,22 @@
             ppmdifference2 = mzdifference2/inputcalibrantmasses[mass]*1000000
             differencevector2[mass] = round(ppmdifference2, digits=2)

+            ## plotting of 4 spectra in one page
             par(mfrow = c(2, 2), oma=c(0,0,2,0))
+            ## average plot
             plot(msidata_no_NA[minmasspixel:maxmasspixel,], pixel = 1:length(pixelnumber), main= "Average spectrum")
             abline(v=c(inputcalibrantmasses[mass] -plusminusvalues[count], inputcalibrantmasses[mass] ,inputcalibrantmasses[mass] +plusminusvalues[count]), col="blue", lty=c(3,5,3))
             abline(v=c(maxvalue), col="red", lty=2)
             abline(v=c(mzvalue), col="green2", lty=4)
-            plot(msidata_no_NA[minmasspixel:maxmasspixel,], pixel = pixel1, main=paste0("Spectrum at ", rownames(coord(msidata_no_NA)[pixel1,1:2])))
-            abline(v=c(inputcalibrantmasses[mass] -plusminusvalues[count], inputcalibrantmasses[mass] ,inputcalibrantmasses[mass] +plusminusvalues[count]), col="blue", lty=c(3,5,3))
-            abline(v=c(maxvalue), col="red", lty=2)
-            abline(v=c(mzvalue), col="green2", lty=4)
+            ## average plot including points per data point
+            plot(msidata_no_NA[minmasspixel:maxmasspixel,], pixel = 1:length(pixelnumber), main="Average spectrum with data points")
+            points(mz(msidata_no_NA[minmasspixel:maxmasspixel,]), rowMeans(spectra(msidata_no_NA)[minmasspixel:maxmasspixel,]), col="blue", pch=20)
+            ## plot of a random pixel (1)
             plot(msidata_no_NA[minmasspixel:maxmasspixel,], pixel = pixel2, main= paste0("Spectrum at ", rownames(coord(msidata_no_NA)[pixel2,1:2])))
             abline(v=c(inputcalibrantmasses[mass] -plusminusvalues[count], inputcalibrantmasses[mass] ,inputcalibrantmasses[mass] +plusminusvalues[count]), col="blue", lty=c(3,5,3))
             abline(v=c(maxvalue), col="red", lty=2)
             abline(v=c(mzvalue), col="green2", lty=4)
+            ## plot of a random pixel (2)
             plot(msidata_no_NA[minmasspixel:maxmasspixel,], pixel = pixel3, main= paste0("Spectrum at ", rownames(coord(msidata_no_NA)[pixel3,1:2])))
             abline(v=c(inputcalibrantmasses[mass] -plusminusvalues[count], inputcalibrantmasses[mass] ,inputcalibrantmasses[mass] +plusminusvalues[count]), col="blue", lty=c(3,5,3))
             abline(v=c(maxvalue), col="red", lty=2)
@@ -773,12 +790,12 @@
         }else{

             ### plot ppm differences over pixels (spectra index)
-            par(mar=c(4.1, 4.1, 4.1, 7.5))
+            par(mar=c(4.1, 4.1, 4.1, 8.5))

             plot(0,0,type="n", ylim=c(min(ppm_df, na.rm=TRUE),max(ppm_df, na.rm=TRUE)), xlim = c(1,ncol(filtered_data)),xlab = "Spectra index", ylab = "m/z difference in ppm", main="Difference m/z with max. average intensity vs. theor. m/z\n(per spectrum)")
             for (each_cal in 1:ncol(ppm_df)){
                 lines(ppm_df[,each_cal], col=mycolours[each_cal], type="p")}
-            legend("topright", inset=c(-0.25,0), xpd = TRUE, bty="n", legend=inputcalibrantmasses, col=mycolours[1:ncol(ppm_df)],lty=1)
+            legend("topright", inset=c(-0.2,0), xpd = TRUE, bty="n", cex=0.8,legend=inputcalibrantmasses, col=mycolours[1:ncol(ppm_df)],lty=1)
              if (!is.null(levels(msidata\$annotation))){
                 abline(v=abline_vector, lty = 3)}}

@@ -804,7 +821,7 @@
                 <when value="no_annotation"/>
         </conditional>
         <expand macro="pdf_filename"/>
-        <expand macro="reading_2_column_mz_tabular"/>
+        <expand macro="reading_2_column_mz_tabular" optional="true"/>
         <param name="plusminus_ppm" value="200" type="float" label="ppm range" help="Will be added in both directions to input calibrant m/z"/>
         <param name="do_pca" type="boolean" label="PCA with 2 components"/>
         <repeat name="calibrantratio" title="Plot fold change of two m/z" min="0" max="10">
@@ -855,9 +872,6 @@
             <conditional name="tabular_annotation">
                 <param name="load_annotation" value="no_annotation"/>
             </conditional>
-            <param name="calibrant_file" value="inputcalibrantfile2.txt"/>
-            <param name="mz_column" value="1"/>
-            <param name="name_column" value="2"/>
             <param name="filename" value="Testfile_analyze75"/>
             <param name="do_pca" value="True"/>
             <output name="QC_report" file="QC_analyze75.pdf" compare="sim_size"/>
@@ -946,6 +960,7 @@
 - (cal) Intensity heatmaps for the m/z value that is closest to the calibrant m/z. The intensities are averaged within the calibrant m/z window (ppm range).
 - Number of peaks per spectrum: For each spectrum the number of m/z values with intensity > 0 is calculated and plotted as heatmap.
 - Total ion chromatogram: For each spectrum all intensities are summed up to obtain the TIC which is plotted as heatmap.
+- Median intensity: For each spectrum the median intensity is plotted as heatmap.
 - Most abundant m/z in each spectrum: For each spectrum the m/z value with the highest intensity is plotted.
 - PCA for two components: Result of a principal component analysis (PCA) for two components is given. The loading plot depicts the contribution of each m/z value and the x-y image represents the differences between the pixels.

@@ -973,7 +988,7 @@
 **Mass spectra and m/z accuracy**

 - Mass spectra over the full m/z range: First plot shows the average intensities over all spectra. The other three mass spectra are from random individual pixels (spectra).
-- (cal) For each calibrant four zoomed in mass spectrum are drawn: The first shows the average intensities over all spectra and the other three are single mass spectra. The theoretical calibrant m/z (taken from the input file) is represented by the dashed blue line. The dotted blue lines show the given ppm range. The green line is the m/z value that is closest to the theoretical calibrant and the red line is the m/z with the highest average intensity in the m/z window.
+- (cal) For each calibrant four zoomed in mass spectrum are drawn: The first two mass spectra show the average intensities over all spectra and the other two specra are from random individual pixels. The theoretical calibrant m/z (taken from the input file) is represented by the dashed blue line. The dotted blue lines show the given ppm range. The green line is the m/z value that is closest to the theoretical calibrant and the red line is the m/z with the highest average intensity in the m/z window. In the second average spectra plot each blue plot indicates one data point.
 - (annot) Average spectrum per annotation group: For each calibrant a zoomed in mass spectrum is plotted this time with the average intensities for each annotation group separately.
 - (cal) Difference m/z with max. average intensity vs. theor. calibrant m/z: The difference in ppm between the m/z with the highest average intensity and the theoretical m/z are plotted for each calibrant. This corresponds to the difference between the dashed blue line and the red line in the zoomed in mass spectra.
 - (cal) Difference closest measured m/z vs. theor. calibrant m/z: The difference in ppm between the closest m/z value and the theoretical m/z values are plotted for each calibrant. This corresponds to the difference between the dashed blue line and the green line in the zoomed in mass spectra.
Binary file test-data/Heatmaps_LM8_file16.pdf has changed
Binary file test-data/Heatmaps_analyze75.pdf has changed
Binary file test-data/Heatmaps_imzml.pdf has changed
Binary file test-data/Heatmaps_rdata.pdf has changed
Binary file test-data/Plot_analyze75.pdf has changed
Binary file test-data/Plot_analyze75_allpixels.pdf has changed
Binary file test-data/Plot_empty_spectra.pdf has changed
Binary file test-data/Plot_imzml.pdf has changed
Binary file test-data/Plot_rdata.pdf has changed
Binary file test-data/QC_analyze75.pdf has changed
Binary file test-data/QC_empty_spectra.pdf has changed
Binary file test-data/QC_imzml.pdf has changed
Binary file test-data/QC_rdata.pdf has changed
--- a/test-data/analyze75.svg	Mon Oct 01 01:07:13 2018 -0400
+++ b/test-data/analyze75.svg	Thu Oct 25 07:28:42 2018 -0400
@@ -1,15 +1,15 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="216pt" height="216pt" viewBox="0 0 216 216" version="1.1">
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="504pt" height="504pt" viewBox="0 0 504 504" version="1.1">
 <g id="surface1">
-<rect x="0" y="0" width="216" height="216" style="fill:rgb(100%,100%,100%);fill-opacity:1;stroke:none;"/>
-<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,100%,80%);fill-opacity:1;" d="M 0 0 L 72 0 L 72 72 L 0 72 Z M 0 0 "/>
-<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,100%,32.156863%);fill-opacity:1;" d="M 0 72 L 72 72 L 72 144 L 0 144 Z M 0 72 "/>
-<path style=" stroke:none;fill-rule:nonzero;fill:rgb(100%,3.921569%,0%);fill-opacity:1;" d="M 0 144 L 72 144 L 72 216 L 0 216 Z M 0 144 "/>
-<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 72 0 L 144 0 L 144 72 L 72 72 Z M 72 0 "/>
-<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,0.392157%,33.333333%);fill-opacity:1;" d="M 72 72 L 144 72 L 144 144 L 72 144 Z M 72 72 "/>
-<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,1.568627%,93.72549%);fill-opacity:1;" d="M 72 144 L 144 144 L 144 216 L 72 216 Z M 72 144 "/>
-<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,0.784314%,54.509804%);fill-opacity:1;" d="M 144 0 L 216 0 L 216 72 L 144 72 Z M 144 0 "/>
-<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,1.568627%,81.568627%);fill-opacity:1;" d="M 144 72 L 216 72 L 216 144 L 144 144 Z M 144 72 "/>
-<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,1.960784%,100%);fill-opacity:1;" d="M 144 144 L 216 144 L 216 216 L 144 216 Z M 144 144 "/>
+<rect x="0" y="0" width="504" height="504" style="fill:rgb(100%,100%,100%);fill-opacity:1;stroke:none;"/>
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,100%,80%);fill-opacity:1;" d="M 0 504 L 168 504 L 168 336 L 0 336 Z M 0 504 "/>
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,100%,32.156863%);fill-opacity:1;" d="M 0 336 L 168 336 L 168 168 L 0 168 Z M 0 336 "/>
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(100%,3.921569%,0%);fill-opacity:1;" d="M 0 168 L 168 168 L 168 0 L 0 0 Z M 0 168 "/>
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,0%,0%);fill-opacity:1;" d="M 168 504 L 336 504 L 336 336 L 168 336 Z M 168 504 "/>
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,0.392157%,33.333333%);fill-opacity:1;" d="M 168 336 L 336 336 L 336 168 L 168 168 Z M 168 336 "/>
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,1.568627%,93.72549%);fill-opacity:1;" d="M 168 168 L 336 168 L 336 0 L 168 0 Z M 168 168 "/>
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,0.784314%,54.509804%);fill-opacity:1;" d="M 336 504 L 504 504 L 504 336 L 336 336 Z M 336 504 "/>
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,1.568627%,81.568627%);fill-opacity:1;" d="M 336 336 L 504 336 L 504 168 L 336 168 Z M 336 336 "/>
+<path style=" stroke:none;fill-rule:nonzero;fill:rgb(0%,1.960784%,100%);fill-opacity:1;" d="M 336 168 L 504 168 L 504 0 L 336 0 Z M 336 168 "/>
 </g>
 </svg>
Binary file test-data/analyze75_filtered2.pdf has changed
Binary file test-data/analyze_filteredoutside.RData has changed
Binary file test-data/centroids_rdata.pdf has changed
Binary file test-data/cluster_skm.RData has changed
Binary file test-data/imzml_filtered2.pdf has changed
Binary file test-data/imzml_filtered3.RData has changed
Binary file test-data/imzml_filtered3.pdf has changed
Binary file test-data/imzml_filtered4.RData has changed
Binary file test-data/imzml_filtered4.pdf has changed
Binary file test-data/imzml_filtered5.RData has changed
Binary file test-data/imzml_filtered5.pdf has changed
Binary file test-data/kmeans_analyze.pdf has changed
Binary file test-data/pca_imzml.pdf has changed
Binary file test-data/rdata_notfiltered.RData has changed
Binary file test-data/rdata_notfiltered.pdf has changed
Binary file test-data/test1.pdf has changed
Binary file test-data/test2.pdf has changed
Binary file test-data/test2.rdata has changed
Binary file test-data/test3.pdf has changed
Binary file test-data/test4.pdf has changed
Binary file test-data/test4.rdata has changed
Binary file test-data/test5.pdf has changed
Binary file test-data/test6.pdf has changed
Binary file test-data/test6.rdata has changed
Binary file test-data/test7.pdf has changed
Binary file test-data/test7.rdata has changed