Mercurial > repos > computational-metabolomics > mspurity_puritya
comparison purityX.R @ 6:aca2eb389ccd draft
"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2579c8746819670348c378f86116f83703c493eb"
| author | computational-metabolomics | 
|---|---|
| date | Thu, 04 Mar 2021 12:26:24 +0000 | 
| parents | 56cce1a90b73 | 
| children | 33d733850e85 | 
   comparison
  equal
  deleted
  inserted
  replaced
| 5:0d73912c7cdc | 6:aca2eb389ccd | 
|---|---|
| 1 library(msPurity) | 1 library(msPurity) | 
| 2 library(optparse) | 2 library(optparse) | 
| 3 print(sessionInfo()) | 3 print(sessionInfo()) | 
| 4 | 4 | 
| 5 option_list <- list( | 5 option_list <- list( | 
| 6 make_option(c("--xset_path"), type="character"), | 6 make_option(c("--xset_path"), type = "character"), | 
| 7 make_option(c("-o", "--out_dir"), type="character"), | 7 make_option(c("-o", "--out_dir"), type = "character"), | 
| 8 make_option(c("--mzML_path"), type="character"), | 8 make_option(c("--mzML_path"), type = "character"), | 
| 9 make_option("--minOffset", default=0.5), | 9 make_option("--minOffset", default = 0.5), | 
| 10 make_option("--maxOffset", default=0.5), | 10 make_option("--maxOffset", default = 0.5), | 
| 11 make_option("--ilim", default=0.05), | 11 make_option("--ilim", default = 0.05), | 
| 12 make_option("--iwNorm", default="none", type="character"), | 12 make_option("--iwNorm", default = "none", type = "character"), | 
| 13 make_option("--exclude_isotopes", action="store_true"), | 13 make_option("--exclude_isotopes", action = "store_true"), | 
| 14 make_option("--isotope_matrix", type="character"), | 14 make_option("--isotope_matrix", type = "character"), | 
| 15 make_option("--purityType", default="purityFWHMmedian"), | 15 make_option("--purityType", default = "purityFWHMmedian"), | 
| 16 make_option("--singleFile", default=0), | 16 make_option("--singleFile", default = 0), | 
| 17 make_option("--cores", default=4), | 17 make_option("--cores", default = 4), | 
| 18 make_option("--xgroups", type="character"), | 18 make_option("--xgroups", type = "character"), | 
| 19 make_option("--rdata_name", default='xset'), | 19 make_option("--rdata_name", default = "xset"), | 
| 20 make_option("--camera_xcms", default='xset'), | 20 make_option("--camera_xcms", default = "xset"), | 
| 21 make_option("--files", type="character"), | 21 make_option("--files", type = "character"), | 
| 22 make_option("--galaxy_files", type="character"), | 22 make_option("--galaxy_files", type = "character"), | 
| 23 make_option("--choose_class", type="character"), | 23 make_option("--choose_class", type = "character"), | 
| 24 make_option("--ignore_files", type="character"), | 24 make_option("--ignore_files", type = "character"), | 
| 25 make_option("--rtraw_columns", action="store_true") | 25 make_option("--rtraw_columns", action = "store_true") | 
| 26 ) | 26 ) | 
| 27 | 27 | 
| 28 | 28 | 
| 29 opt<- parse_args(OptionParser(option_list=option_list)) | 29 opt <- parse_args(OptionParser(option_list = option_list)) | 
| 30 print(opt) | 30 print(opt) | 
| 31 | 31 | 
| 32 | 32 | 
| 33 if (!is.null(opt$xgroups)){ | 33 if (!is.null(opt$xgroups)) { | 
| 34 xgroups = as.numeric(strsplit(opt$xgroups, ',')[[1]]) | 34 xgroups <- as.numeric(strsplit(opt$xgroups, ",")[[1]]) | 
| 35 }else{ | 35 }else{ | 
| 36 xgroups = NULL | 36 xgroups <- NULL | 
| 37 } | 37 } | 
| 38 | |
| 39 | 38 | 
| 40 | 39 | 
| 41 print(xgroups) | 40 print(xgroups) | 
| 42 | 41 | 
| 43 if (!is.null(opt$remove_nas)){ | 42 if (!is.null(opt$remove_nas)) { | 
| 44 df <- df[!is.na(df$mz),] | 43 df <- df[!is.na(df$mz), ] | 
| 45 } | 44 } | 
| 46 | 45 | 
| 47 if (is.null(opt$isotope_matrix)){ | 46 if (is.null(opt$isotope_matrix)) { | 
| 48 im <- NULL | 47 im <- NULL | 
| 49 }else{ | 48 }else{ | 
| 50 im <- read.table(opt$isotope_matrix, | 49 im <- read.table(opt$isotope_matrix, | 
| 51 header = TRUE, sep='\t', stringsAsFactors = FALSE) | 50 header = TRUE, sep = "\t", stringsAsFactors = FALSE) | 
| 52 } | 51 } | 
| 53 | 52 | 
| 54 if (is.null(opt$exclude_isotopes)){ | 53 if (is.null(opt$exclude_isotopes)) { | 
| 55 isotopes <- FALSE | 54 isotopes <- FALSE | 
| 56 }else{ | 55 }else{ | 
| 57 isotopes <- TRUE | 56 isotopes <- TRUE | 
| 58 } | 57 } | 
| 59 | 58 | 
| 60 if (is.null(opt$rtraw_columns)){ | 59 if (is.null(opt$rtraw_columns)) { | 
| 61 rtraw_columns <- FALSE | 60 rtraw_columns <- FALSE | 
| 62 }else{ | 61 }else{ | 
| 63 rtraw_columns <- TRUE | 62 rtraw_columns <- TRUE | 
| 64 } | 63 } | 
| 65 | 64 | 
| 66 loadRData <- function(rdata_path, xset_name){ | 65 loadRData <- function(rdata_path, xset_name) { | 
| 67 #loads an RData file, and returns the named xset object if it is there | 66 #loads an RData file, and returns the named xset object if it is there | 
| 68 load(rdata_path) | 67 load(rdata_path) | 
| 69 return(get(ls()[ls() == xset_name])) | 68 return(get(ls()[ls() == xset_name])) | 
| 70 } | 69 } | 
| 71 | 70 | 
| 72 target_obj <- loadRData(opt$xset_path, opt$rdata_name) | 71 target_obj <- loadRData(opt$xset_path, opt$rdata_name) | 
| 73 | 72 | 
| 74 if (opt$camera_xcms=='camera'){ | 73 if (opt$camera_xcms == "camera") { | 
| 75 xset <- target_obj@xcmsSet | 74 xset <- target_obj@xcmsSet | 
| 76 }else{ | 75 }else{ | 
| 77 xset <- target_obj | 76 xset <- target_obj | 
| 78 } | 77 } | 
| 79 | 78 | 
| 80 print(xset) | 79 print(xset) | 
| 81 | 80 | 
| 82 minOffset = as.numeric(opt$minOffset) | 81 minOffset <- as.numeric(opt$minOffset) | 
| 83 maxOffset = as.numeric(opt$maxOffset) | 82 maxOffset <- as.numeric(opt$maxOffset) | 
| 84 | 83 | 
| 85 | 84 if (opt$iwNorm == "none") { | 
| 86 if (opt$iwNorm=='none'){ | 85 iwNorm <- FALSE | 
| 87 iwNorm = FALSE | 86 iwNormFun <- NULL | 
| 88 iwNormFun = NULL | 87 }else if (opt$iwNorm == "gauss") { | 
| 89 }else if (opt$iwNorm=='gauss'){ | 88 iwNorm <- TRUE | 
| 90 iwNorm = TRUE | 89 iwNormFun <- msPurity::iwNormGauss(minOff = -minOffset, maxOff = maxOffset) | 
| 91 iwNormFun = msPurity::iwNormGauss(minOff=-minOffset, maxOff=maxOffset) | 90 }else if (opt$iwNorm == "rcosine") { | 
| 92 }else if (opt$iwNorm=='rcosine'){ | 91 iwNorm <- TRUE | 
| 93 iwNorm = TRUE | 92 iwNormFun <- msPurity::iwNormRcosine(minOff = -minOffset, maxOff = maxOffset) | 
| 94 iwNormFun = msPurity::iwNormRcosine(minOff=-minOffset, maxOff=maxOffset) | 93 }else if (opt$iwNorm == "QE5") { | 
| 95 }else if (opt$iwNorm=='QE5'){ | 94 iwNorm <- TRUE | 
| 96 iwNorm = TRUE | 95 iwNormFun <- msPurity::iwNormQE.5() | 
| 97 iwNormFun = msPurity::iwNormQE.5() | |
| 98 } | 96 } | 
| 99 | 97 | 
| 100 print(xset@filepaths) | 98 print(xset@filepaths) | 
| 101 | 99 | 
| 102 if (!is.null(opt$files)){ | 100 if (!is.null(opt$files)) { | 
| 103 updated_filepaths <- trimws(strsplit(opt$files, ',')[[1]]) | 101 updated_filepaths <- trimws(strsplit(opt$files, ",")[[1]]) | 
| 104 updated_filepaths <- updated_filepaths[updated_filepaths != ""] | 102 updated_filepaths <- updated_filepaths[updated_filepaths != ""] | 
| 105 print(updated_filepaths) | 103 print(updated_filepaths) | 
| 106 updated_filenames = basename(updated_filepaths) | 104 updated_filenames <- basename(updated_filepaths) | 
| 107 original_filenames = basename(xset@filepaths) | 105 original_filenames <- basename(xset@filepaths) | 
| 108 update_idx = match(updated_filenames, original_filenames) | 106 update_idx <- match(updated_filenames, original_filenames) | 
| 109 | 107 | 
| 110 if (!is.null(opt$galaxy_files)){ | 108 if (!is.null(opt$galaxy_files)) { | 
| 111 galaxy_files <- trimws(strsplit(opt$galaxy_files, ',')[[1]]) | 109 galaxy_files <- trimws(strsplit(opt$galaxy_files, ",")[[1]]) | 
| 112 galaxy_files <- galaxy_files[galaxy_files != ""] | 110 galaxy_files <- galaxy_files[galaxy_files != ""] | 
| 113 xset@filepaths <- galaxy_files[update_idx] | 111 xset@filepaths <- galaxy_files[update_idx] | 
| 114 }else{ | 112 }else{ | 
| 115 xset@filepaths <- updated_filepaths[update_idx] | 113 xset@filepaths <- updated_filepaths[update_idx] | 
| 116 } | 114 } | 
| 117 } | 115 } | 
| 118 | 116 | 
| 119 if (!is.null(opt$choose_class)){ | 117 if (!is.null(opt$choose_class)) { | 
| 120 classes <- trimws(strsplit(opt$choose_class, ',')[[1]]) | 118 classes <- trimws(strsplit(opt$choose_class, ",")[[1]]) | 
| 121 | |
| 122 | 119 | 
| 123 ignore_files_class <- which(!as.character(xset@phenoData$class) %in% classes) | 120 ignore_files_class <- which(!as.character(xset@phenoData$class) %in% classes) | 
| 124 | 121 | 
| 125 print('choose class') | 122 print("choose class") | 
| 126 print(ignore_files_class) | 123 print(ignore_files_class) | 
| 127 }else{ | 124 }else{ | 
| 128 ignore_files_class <- NA | 125 ignore_files_class <- NA | 
| 129 } | 126 } | 
| 130 | 127 | 
| 131 if (!is.null(opt$ignore_files)){ | 128 if (!is.null(opt$ignore_files)) { | 
| 132 ignore_files_string <- trimws(strsplit(opt$ignore_files, ',')[[1]]) | 129 ignore_files_string <- trimws(strsplit(opt$ignore_files, ",")[[1]]) | 
| 133 filenames <- rownames(xset@phenoData) | 130 filenames <- rownames(xset@phenoData) | 
| 134 ignore_files <- which(filenames %in% ignore_files_string) | 131 ignore_files <- which(filenames %in% ignore_files_string) | 
| 135 | 132 | 
| 136 ignore_files <- unique(c(ignore_files, ignore_files_class)) | 133 ignore_files <- unique(c(ignore_files, ignore_files_class)) | 
| 137 ignore_files <- ignore_files[ignore_files != ""] | 134 ignore_files <- ignore_files[ignore_files != ""] | 
| 138 }else{ | 135 }else{ | 
| 139 if (anyNA(ignore_files_class)){ | 136 if (anyNA(ignore_files_class)) { | 
| 140 ignore_files <- NULL | 137 ignore_files <- NULL | 
| 141 }else{ | 138 }else{ | 
| 142 ignore_files <- ignore_files_class | 139 ignore_files <- ignore_files_class | 
| 143 } | 140 } | 
| 144 | 141 | 
| 145 } | 142 } | 
| 146 | 143 | 
| 147 print('ignore_files') | 144 print("ignore_files") | 
| 148 print(ignore_files) | 145 print(ignore_files) | 
| 149 | 146 | 
| 150 | 147 | 
| 151 ppLCMS <- msPurity::purityX(xset=xset, | 148 ppLCMS <- msPurity::purityX(xset = xset, | 
| 152 offsets=c(minOffset, maxOffset), | 149 offsets = c(minOffset, maxOffset), | 
| 153 cores=opt$cores, | 150 cores = opt$cores, | 
| 154 xgroups=xgroups, | 151 xgroups = xgroups, | 
| 155 purityType=opt$purityType, | 152 purityType = opt$purityType, | 
| 156 ilim = opt$ilim, | 153 ilim = opt$ilim, | 
| 157 isotopes = isotopes, | 154 isotopes = isotopes, | 
| 158 im = im, | 155 im = im, | 
| 159 iwNorm = iwNorm, | 156 iwNorm = iwNorm, | 
| 160 iwNormFun = iwNormFun, | 157 iwNormFun = iwNormFun, | 
| 161 singleFile = opt$singleFile, | 158 singleFile = opt$singleFile, | 
| 162 fileignore = ignore_files, | 159 fileignore = ignore_files, | 
| 163 rtrawColumns=rtraw_columns) | 160 rtrawColumns = rtraw_columns) | 
| 164 | |
| 165 | 161 | 
| 166 dfp <- ppLCMS@predictions | 162 dfp <- ppLCMS@predictions | 
| 167 | 163 | 
| 168 # to make compatable with deconrank | 164 # to make compatable with deconrank | 
| 169 colnames(dfp)[colnames(dfp)=='grpid'] = 'peakID' | 165 colnames(dfp)[colnames(dfp) == "grpid"] <- "peakID" | 
| 170 colnames(dfp)[colnames(dfp)=='median'] = 'medianPurity' | 166 colnames(dfp)[colnames(dfp) == "median"] <- "medianPurity" | 
| 171 colnames(dfp)[colnames(dfp)=='mean'] = 'meanPurity' | 167 colnames(dfp)[colnames(dfp) == "mean"] <- "meanPurity" | 
| 172 colnames(dfp)[colnames(dfp)=='sd'] = 'sdPurity' | 168 colnames(dfp)[colnames(dfp) == "sd"] <- "sdPurity" | 
| 173 colnames(dfp)[colnames(dfp)=='stde'] = 'sdePurity' | 169 colnames(dfp)[colnames(dfp) == "stde"] <- "sdePurity" | 
| 174 colnames(dfp)[colnames(dfp)=='RSD'] = 'cvPurity' | 170 colnames(dfp)[colnames(dfp) == "RSD"] <- "cvPurity" | 
| 175 colnames(dfp)[colnames(dfp)=='pknm'] = 'pknmPurity' | 171 colnames(dfp)[colnames(dfp) == "pknm"] <- "pknmPurity" | 
| 176 if(sum(is.na(dfp$medianPurity))>0){ | 172 | 
| 177 dfp[is.na(dfp$medianPurity),]$medianPurity = 0 | 173 if (sum(is.na(dfp$medianPurity)) > 0) { | 
| 174 dfp[is.na(dfp$medianPurity), ]$medianPurity <- 0 | |
| 178 } | 175 } | 
| 179 | 176 | 
| 177 print(head(dfp)) | |
| 178 write.table(dfp, file.path(opt$out_dir, "purityX_output.tsv"), row.names = FALSE, sep = "\t") | |
| 180 | 179 | 
| 181 print(head(dfp)) | 180 save.image(file.path(opt$out_dir, "purityX_output.RData")) | 
| 182 write.table(dfp, file.path(opt$out_dir, 'purityX_output.tsv'), row.names=FALSE, sep='\t') | |
| 183 | |
| 184 save.image(file.path(opt$out_dir, 'purityX_output.RData')) | 
