Mercurial > repos > galaxyp > msi_combine
changeset 1:f3f6c32ab690 draft
planemo upload for repository https://github.com/galaxyproteomics/tools-galaxyp/tree/master/tools/msi_combine commit dd64f41874a56c4e2619bf58ae3681d806cf9b3f
| author | galaxyp |
|---|---|
| date | Tue, 08 May 2018 02:36:26 -0400 |
| parents | 9cbcf48bf60a |
| children | 00b6c61f5054 |
| files | msi_combine.xml test-data/123_combined_QC.pdf test-data/12_auto_combined.RData test-data/12_auto_combined_QC.pdf test-data/12_auto_combined_matrix.tabular test-data/12_combined_QC.pdf |
| diffstat | 6 files changed, 8640 insertions(+), 51 deletions(-) [+] |
line wrap: on
line diff
--- a/msi_combine.xml Tue Apr 24 13:22:48 2018 -0400 +++ b/msi_combine.xml Tue May 08 02:36:26 2018 -0400 @@ -1,20 +1,21 @@ -<tool id="mass_spectrometry_imaging_combine" name="MSI combine" version="1.7.0.0"> +<tool id="mass_spectrometry_imaging_combine" name="MSI combine" version="1.10.0.0"> <description> combine several mass spectrometry imaging datasets into one </description> <requirements> - <requirement type="package" version="1.7.0">bioconductor-cardinal</requirement> + <requirement type="package" version="1.10.0">bioconductor-cardinal</requirement> + <requirement type="package" version="2.2.1">r-ggplot2</requirement> </requirements> <command detect_errors="exit_code"> <![CDATA[ #for $i, $infile in enumerate($infiles): #if $infile.ext == 'imzml' - cp '${infile.extra_files_path}/imzml' infile_$i.imzML && - cp '${infile.extra_files_path}/ibd' infile_$i.ibd && + ln -s '${infile.extra_files_path}/imzml' infile.imzML && + ln -s '${infile.extra_files_path}/ibd' infile.ibd && #elif $infile.ext == 'analyze75' - cp '${infile.extra_files_path}/hdr' infile_$i.hdr && - cp '${infile.extra_files_path}/img' infile_$i.img && - cp '${infile.extra_files_path}/t2m' infile_$i.t2m && + ln -s '${infile.extra_files_path}/hdr' infile.hdr && + ln -s '${infile.extra_files_path}/img' infile.img && + ln -s '${infile.extra_files_path}/t2m' infile.t2m && #else ln -s '$infile' infile_${i}.RData && #end if @@ -26,24 +27,37 @@ </command> <configfiles> <configfile name="msi_combine"><![CDATA[ -library(Cardinal) +#import re +################ load libraries, read rename and combine files ################# -#if $coordinates_file: - input_list = read.delim("$coordinates_file", header = FALSE, +library(Cardinal) +library(ggplot2) + +#if str( $combine_conditional.combine_method ) == 'xy_shifts': + input_list = read.delim("$combine_conditional.coordinates_file", header = FALSE, stringsAsFactors = FALSE) #end if pixel_vector = numeric() +names_vector = character() +x_shifts = 0 +y_shifts = 0 +max_y = numeric() #set $msidata = [] +#set $pixelcoords = [] +#set $num_infiles = len($infiles) + #for $i, $infile in enumerate($infiles): #if $infile.ext == 'imzml' - msidata_$i <- readMSIData('infile_${i}.imzML') + msidata_$i <- readImzML('infile_${i}') sampleNames(msidata_$i) = "msidata" + pixelcoords_$i = cbind(coord(msidata_$i)[,1:2], rep($i+1,ncol(msidata_$i))) #elif $infile.ext == 'analyze75' - msidata_$i <- readMSIData('infile_${i}.hdr') + msidata_$i <- readAnalyze('infile_${i}') sampleNames(msidata_$i) = "msidata" + pixelcoords_$i = cbind(coord(msidata_$i)[,1:2], rep($i+1,ncol(msidata_$i))) #else loadRData <- function(fileName){ #loads an RData file, and returns it @@ -52,53 +66,190 @@ } msidata_$i = loadRData('infile_${i}.RData') sampleNames(msidata_$i) = "msidata" + pixelcoords_$i = cbind(coord(msidata_$i)[,1:2], rep($i+1,ncol(msidata_$i))) #end if + colnames(pixelcoords_$i)[3] = "file_number" - #if $coordinates_file: - coord(msidata_$i)\$x = coord(msidata_$i)\$x + input_list[$i+1+$coordinates_header,$column_x] - coord(msidata_$i)\$y = coord(msidata_$i)\$y + input_list[$i+1+$coordinates_header,$column_y] - pixelnumber = ncol(msidata_$i) - pixel_vector = append(pixel_vector, rep(input_list[$i+1+$coordinates_header,$column_names],times=pixelnumber)) + #if str( $combine_conditional.combine_method ) == 'xy_shifts': + coord(msidata_$i)\$x = coord(msidata_$i)\$x + input_list[$i+1,$combine_conditional.column_x] + coord(msidata_$i)\$y = coord(msidata_$i)\$y + input_list[$i+1,$combine_conditional.column_y] + pixel_vector = append(pixel_vector, rep(input_list[$i+1,$combine_conditional.column_names],times=ncol(msidata_$i))) + + #elif str( $combine_conditional.combine_method ) == 'automatic_combine': + #set escaped_element_identifier = re.sub('[^\w\-\s\[/]]', '_', str($infile.element_identifier)) + names_vector = append(names_vector, rep(paste($i+1, "$escaped_element_identifier", sep="_"),ncol(msidata_$i))) + coord(msidata_$i)\$x = coord(msidata_$i)\$x - (min(coord(msidata_$i)\$x-1)) + x_shifts + coord(msidata_$i)\$y = coord(msidata_$i)\$y - (min(coord(msidata_$i)\$y-1)) + y_shifts + x_shifts = max(coord(msidata_$i)\$x) + $combine_conditional.x_distance + max_y = append(max_y, max(coord(msidata_$i)\$y)) + + all_files = $num_infiles + new_row = ($i+1)/ceiling(sqrt(all_files)) + new_row%%1==0 + if (new_row%%1==0) + {x_shifts = 0 ### x values start again at zero + y_shifts = max(max_y) + $combine_conditional.y_distance + max_y = numeric()} + #end if #silent $msidata.append('msidata_'+str($i)) + #silent $pixelcoords.append('pixelcoords_'+str($i)) #end for -msidata_combined = do.call(combine, list(#echo ','.join($msidata)#)) +###################### automatic combination ################################### +################################################################################ -### count NAs and replace by 0 -spectra(msidata_combined)[is.na(spectra(msidata_combined))] <- 0 -print(paste0("Number of NAs which were replaced ",sum(is.na(msidata_combined)))) +#if str( $combine_conditional.combine_method ) == 'automatic_combine': + print("automatic_combine") + msidata_combined = do.call(combine, list(#echo ','.join($msidata)#)) + sample_names = as.factor(names_vector) + pData(msidata_combined)\$sample = sample_names -#if $coordinates_file: -### rename pixels according to dataset -sample_names = as.factor(pixel_vector) -msidata_combined@pixelData@data\$sample = sample_names -#end if + ## create PDF to show pixels of each file + pdf("combining_qc.pdf", width=15, height=15) + position_df = cbind(coord(msidata_combined)[,1:2], pData(msidata_combined)\$sample) + colnames(position_df)[3] = "sample_name" + + combine_plot = ggplot(position_df, aes(x=x, y=y, fill=sample_name))+ + geom_tile() + + coord_fixed()+ + ggtitle("Spatial orientation of combined data")+ + theme_bw()+ + theme(text=element_text(family="ArialMT", face="bold", size=15))+ + theme(legend.position="bottom",legend.direction="vertical")+ + guides(fill=guide_legend(ncol=4,byrow=TRUE)) -### outputs ### + coord_labels = aggregate(cbind(x,y)~sample_name, data=position_df, mean) + coord_labels\$file_number = gsub( "_.*$", "", coord_labels\$sample_name) + for(file_count in 1:nrow(coord_labels)) + { + combine_plot = combine_plot + annotate("text",x=coord_labels[file_count,"x"],y=coord_labels[file_count,"y"],label=toString(coord_labels[file_count,4])) + } + + print(combine_plot) + dev.off() + + ## save as (.RData) + msidata = msidata_combined + save(msidata, file="$msidata_combined") +################################## xy shifts ################################### +################################################################################ +#elif str( $combine_conditional.combine_method ) == 'xy_shifts': + print("xy_shifts") + msidata_combined = do.call(combine, list(#echo ','.join($msidata)#)) + +############# replace NA with 0 and rename pixels according to dataset ######### + + spectra(msidata_combined)[is.na(spectra(msidata_combined))] <- 0 + print(paste0("Number of NAs which were replaced ",sum(is.na(spectra(msidata_combined))))) + + sample_names = as.factor(pixel_vector) + pData(msidata_combined)\$sample = sample_names -## save as (.RData) -msidata = msidata_combined -save(msidata, file="$msidata_combined") +###################################### outputs ################################# + ## save as (.RData) + msidata = msidata_combined + save(msidata, file="$msidata_combined") + + ## create PDF to show pixels of each file + pdf("combining_qc.pdf") + position_df = cbind(coord(msidata), pData(msidata)\$sample) + colnames(position_df)[3] = "sample_name" + + combine_plot = ggplot(position_df, aes(x=x, y=y, fill=sample_name))+ + geom_tile() + + coord_fixed()+ + ggtitle("Spatial orientation of combined data")+ + theme_bw()+ + theme(text=element_text(family="ArialMT", face="bold", size=12)) + + coord_labels = aggregate(cbind(x,y)~sample_name, data=position_df, mean) + for(file_count in 1:nrow(coord_labels)) + { + combine_plot = combine_plot + annotate("text",x=coord_labels[file_count,"x"],y=coord_labels[file_count,"y"],label=toString(coord_labels[file_count,"sample_name"])) + } + + print(combine_plot) + + dev.off() + -pdf("combining_qc.pdf", fonts = "Times", pointsize = 12) -image(msidata_combined, mz=1, colorkey=FALSE) -dev.off() +################################## no shifts ################################### +################################################################################ +#elif str( $combine_conditional.combine_method ) == 'no_shifts': + print("no_shifts") + msidata_combined = do.call(combine, list(#echo ','.join($msidata)#)) + +############# replace NA with 0 and rename pixels according to dataset ######### + + spectra(msidata_combined)[is.na(spectra(msidata_combined))] <- 0 + print(paste0("Number of NAs which were replaced ",sum(is.na(spectra(msidata_combined))))) + +###################################### outputs ################################# + ## save as (.RData) + msidata = msidata_combined + save(msidata, file="$msidata_combined") + + ## create PDF to show pixels of each file + pdf("combining_qc.pdf") + position_df = cbind(coord(msidata), pData(msidata)\$sample) + colnames(position_df)[3] = "sample_name" + + ggplot(position_df, aes(x=x, y=y, fill=sample_name))+ + geom_tile() + + coord_fixed()+ + ggtitle("Spatial orientation of combined data")+ + theme_bw()+ + theme(text=element_text(family="ArialMT", face="bold", size=12)) + + dev.off() + -### optional: intensity matrix ### +################################## no combination ############################## +################################################################################ +#elif str( $combine_conditional.combine_method ) == 'no_combine': + print("no_combine") + + ## create PDF to show pixels of each file + pdf("combining_qc.pdf") + position_df = do.call(rbind, list(#echo ','.join($pixelcoords)#)) + position_df[duplicated(position_df[,1:2]),3] = 0 + position_df\$file_number = as.factor(position_df\$file_number) + + combine_plot = ggplot(position_df, aes(x=x, y=y, fill=file_number))+ + geom_tile() + + coord_fixed()+ + ggtitle("Spatial orientation before combination")+ + theme_bw()+ + theme(text=element_text(family="ArialMT", face="bold", size=12))+ + theme(panel.grid.major = element_line(colour = "black")) + + scale_x_continuous(minor_breaks = seq(min(position_df\$x-50), max(position_df\$x+50, 1))) + + scale_y_continuous(minor_breaks = seq(min(position_df\$y-50), max(position_df\$y+50, 1))) + + + coord_labels = aggregate(cbind(x,y)~file_number, data=position_df, mean) + for(file_count in 1:nrow(coord_labels)) + { + combine_plot = combine_plot + annotate("text",x=coord_labels[file_count,"x"],y=coord_labels[file_count,"y"],label=toString(coord_labels[file_count,"file_number"])) + } + + print(combine_plot) + + dev.off() +#end if #if $output_matrix: -if (length(features(msidata_combined))> 0 & length(pixels(msidata_combined)) > 0) -{ - spectramatrix = spectra(msidata_combined) - rownames(spectramatrix) = mz(msidata_combined) - newmatrix = rbind(pixels(msidata_combined), spectramatrix) - write.table(newmatrix[2:nrow(newmatrix),], file="$matrixasoutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") -}else{ - print("file has no features or pixels left") -} + if (length(features(msidata_combined))> 0 & length(pixels(msidata_combined)) > 0) + { + spectramatrix = spectra(msidata_combined) + rownames(spectramatrix) = mz(msidata_combined) + newmatrix = rbind(pixels(msidata_combined), spectramatrix) + write.table(newmatrix[2:nrow(newmatrix),], file="$matrixasoutput", quote = FALSE, row.names = TRUE, col.names=NA, sep = "\t") + }else{ + print("file has no features or pixels left") + } #end if ]]></configfile> @@ -107,13 +258,29 @@ <param name="infiles" type="data" multiple="true" format="imzml,rdata,analyze75" label="MSI rawdata as imzml, analyze7.5 or Cardinal MSImageSet saved as RData" help="load imzml and ibd file by uploading composite datatype imzml"/> - <param name="coordinates_file" type="data" optional="true" format="tabular" label="X and y values to shift data before combining" - help="tabular file with pixels of interest in two separate columns"/> - <param name="column_x" data_ref="coordinates_file" optional="true" label="Column with values for shift in x direction" type="data_column"/> - <param name="column_y" data_ref="coordinates_file" optional="true" label="Column with values for shift in y direction" type="data_column"/> - <param name="column_names" data_ref="coordinates_file" optional="true" label="Column with dataset names" type="data_column"/> - <param name="coordinates_header" label="Number of header lines to skip" value="0" type="integer"/> - <param name="output_matrix" type="boolean" display="radio" label="Intensity matrix output"/> + <conditional name="combine_conditional"> + <param name="combine_method" type="select" label="Select the way you want to combine multiple files" help="More detailed help can be found in the help section at the bottom"> + <option value="automatic_combine" selected="True" >automatic combination</option> + <option value="no_shifts" >no coordinates shift</option> + <option value="xy_shifts">xy shifts by hand</option> + <option value="no_combine">check pixels before combination</option> + </param> + <when value="no_shifts"> + </when> + <when value="automatic_combine"> + <param name="x_distance" type="integer" value="10" label="How many pixels in x direction should be between files?"/> + <param name="y_distance" type="integer" value="10" label="How many pixels in y direction should be between files?"/> + </when> + <when value="xy_shifts"> + <param name="coordinates_file" type="data" format="tabular" label="datasetnames, X and y values to shift data before combining" + help="Tabular file with three columns: 1 for the filename, 1 for the x-coordinate shift and 1 for the y-coordinate shift"/> + <param name="column_x" data_ref="coordinates_file" label="Column with values for shift in x direction" type="data_column"/> + <param name="column_y" data_ref="coordinates_file" label="Column with values for shift in y direction" type="data_column"/> + <param name="column_names" data_ref="coordinates_file" label="Column with dataset names" type="data_column"/> + </when> + <when value="no_combine"/> + </conditional> + <param name="output_matrix" type="boolean" display="radio" label="Intensity matrix output"/> </inputs> <outputs> <data format="rdata" name="msidata_combined" label="Combined MSI data"/> @@ -125,6 +292,7 @@ <tests> <test expect_num_outputs="3"> <param name="infiles" value="msidata_1.RData,msidata_2.RData,msidata_3.RData" ftype="rdata"/> + <param name="combine_method" value="xy_shifts"/> <param name="coordinates_file" ftype="tabular" value="xy_coordinates.tabular"/> <param name="column_x" value="1"/> <param name="column_y" value="2"/> @@ -136,15 +304,36 @@ </test> <test expect_num_outputs="3"> <param name="infiles" value="msidata_1.RData,msidata_2.RData" ftype="rdata"/> + <param name="combine_method" value="no_shifts"/> <param name="output_matrix" value="True"/> <output name="matrixasoutput" file="12_combined_matrix.tabular"/> <output name="msidata_combined" file="12_combined.RData" compare="sim_size" /> <output name="combining_qc" file="12_combined_QC.pdf" compare="sim_size" delta="20000"/> </test> + <test expect_num_outputs="3"> + <param name="infiles" value="msidata_1.RData,msidata_2.RData" ftype="rdata"/> + <param name="combine_method" value="automatic_combine"/> + <param name="x_distance" value="1"/> + <param name="y_distance" value="1"/> + <param name="output_matrix" value="True"/> + <output name="matrixasoutput" file="12_auto_combined_matrix.tabular"/> + <output name="msidata_combined" file="12_auto_combined.RData" compare="sim_size" /> + <output name="combining_qc" file="12_auto_combined_QC.pdf" compare="sim_size" delta="20000"/> + </test> </tests> <help> <