comparison purityX.R @ 6:6b9a83e08467 draft

"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2579c8746819670348c378f86116f83703c493eb"
author computational-metabolomics
date Thu, 04 Mar 2021 12:25:25 +0000
parents a141be614e76
children b16952cc06d2
comparison
equal deleted inserted replaced
5:84b2151fd8f4 6:6b9a83e08467
1 library(msPurity) 1 library(msPurity)
2 library(optparse) 2 library(optparse)
3 print(sessionInfo()) 3 print(sessionInfo())
4 4
5 option_list <- list( 5 option_list <- list(
6 make_option(c("--xset_path"), type="character"), 6 make_option(c("--xset_path"), type = "character"),
7 make_option(c("-o", "--out_dir"), type="character"), 7 make_option(c("-o", "--out_dir"), type = "character"),
8 make_option(c("--mzML_path"), type="character"), 8 make_option(c("--mzML_path"), type = "character"),
9 make_option("--minOffset", default=0.5), 9 make_option("--minOffset", default = 0.5),
10 make_option("--maxOffset", default=0.5), 10 make_option("--maxOffset", default = 0.5),
11 make_option("--ilim", default=0.05), 11 make_option("--ilim", default = 0.05),
12 make_option("--iwNorm", default="none", type="character"), 12 make_option("--iwNorm", default = "none", type = "character"),
13 make_option("--exclude_isotopes", action="store_true"), 13 make_option("--exclude_isotopes", action = "store_true"),
14 make_option("--isotope_matrix", type="character"), 14 make_option("--isotope_matrix", type = "character"),
15 make_option("--purityType", default="purityFWHMmedian"), 15 make_option("--purityType", default = "purityFWHMmedian"),
16 make_option("--singleFile", default=0), 16 make_option("--singleFile", default = 0),
17 make_option("--cores", default=4), 17 make_option("--cores", default = 4),
18 make_option("--xgroups", type="character"), 18 make_option("--xgroups", type = "character"),
19 make_option("--rdata_name", default='xset'), 19 make_option("--rdata_name", default = "xset"),
20 make_option("--camera_xcms", default='xset'), 20 make_option("--camera_xcms", default = "xset"),
21 make_option("--files", type="character"), 21 make_option("--files", type = "character"),
22 make_option("--galaxy_files", type="character"), 22 make_option("--galaxy_files", type = "character"),
23 make_option("--choose_class", type="character"), 23 make_option("--choose_class", type = "character"),
24 make_option("--ignore_files", type="character"), 24 make_option("--ignore_files", type = "character"),
25 make_option("--rtraw_columns", action="store_true") 25 make_option("--rtraw_columns", action = "store_true")
26 ) 26 )
27 27
28 28
29 opt<- parse_args(OptionParser(option_list=option_list)) 29 opt <- parse_args(OptionParser(option_list = option_list))
30 print(opt) 30 print(opt)
31 31
32 32
33 if (!is.null(opt$xgroups)){ 33 if (!is.null(opt$xgroups)) {
34 xgroups = as.numeric(strsplit(opt$xgroups, ',')[[1]]) 34 xgroups <- as.numeric(strsplit(opt$xgroups, ",")[[1]])
35 }else{ 35 }else{
36 xgroups = NULL 36 xgroups <- NULL
37 } 37 }
38
39 38
40 39
41 print(xgroups) 40 print(xgroups)
42 41
43 if (!is.null(opt$remove_nas)){ 42 if (!is.null(opt$remove_nas)) {
44 df <- df[!is.na(df$mz),] 43 df <- df[!is.na(df$mz), ]
45 } 44 }
46 45
47 if (is.null(opt$isotope_matrix)){ 46 if (is.null(opt$isotope_matrix)) {
48 im <- NULL 47 im <- NULL
49 }else{ 48 }else{
50 im <- read.table(opt$isotope_matrix, 49 im <- read.table(opt$isotope_matrix,
51 header = TRUE, sep='\t', stringsAsFactors = FALSE) 50 header = TRUE, sep = "\t", stringsAsFactors = FALSE)
52 } 51 }
53 52
54 if (is.null(opt$exclude_isotopes)){ 53 if (is.null(opt$exclude_isotopes)) {
55 isotopes <- FALSE 54 isotopes <- FALSE
56 }else{ 55 }else{
57 isotopes <- TRUE 56 isotopes <- TRUE
58 } 57 }
59 58
60 if (is.null(opt$rtraw_columns)){ 59 if (is.null(opt$rtraw_columns)) {
61 rtraw_columns <- FALSE 60 rtraw_columns <- FALSE
62 }else{ 61 }else{
63 rtraw_columns <- TRUE 62 rtraw_columns <- TRUE
64 } 63 }
65 64
66 loadRData <- function(rdata_path, xset_name){ 65 loadRData <- function(rdata_path, xset_name) {
67 #loads an RData file, and returns the named xset object if it is there 66 #loads an RData file, and returns the named xset object if it is there
68 load(rdata_path) 67 load(rdata_path)
69 return(get(ls()[ls() == xset_name])) 68 return(get(ls()[ls() == xset_name]))
70 } 69 }
71 70
72 target_obj <- loadRData(opt$xset_path, opt$rdata_name) 71 target_obj <- loadRData(opt$xset_path, opt$rdata_name)
73 72
74 if (opt$camera_xcms=='camera'){ 73 if (opt$camera_xcms == "camera") {
75 xset <- target_obj@xcmsSet 74 xset <- target_obj@xcmsSet
76 }else{ 75 }else{
77 xset <- target_obj 76 xset <- target_obj
78 } 77 }
79 78
80 print(xset) 79 print(xset)
81 80
82 minOffset = as.numeric(opt$minOffset) 81 minOffset <- as.numeric(opt$minOffset)
83 maxOffset = as.numeric(opt$maxOffset) 82 maxOffset <- as.numeric(opt$maxOffset)
84 83
85 84 if (opt$iwNorm == "none") {
86 if (opt$iwNorm=='none'){ 85 iwNorm <- FALSE
87 iwNorm = FALSE 86 iwNormFun <- NULL
88 iwNormFun = NULL 87 }else if (opt$iwNorm == "gauss") {
89 }else if (opt$iwNorm=='gauss'){ 88 iwNorm <- TRUE
90 iwNorm = TRUE 89 iwNormFun <- msPurity::iwNormGauss(minOff = -minOffset, maxOff = maxOffset)
91 iwNormFun = msPurity::iwNormGauss(minOff=-minOffset, maxOff=maxOffset) 90 }else if (opt$iwNorm == "rcosine") {
92 }else if (opt$iwNorm=='rcosine'){ 91 iwNorm <- TRUE
93 iwNorm = TRUE 92 iwNormFun <- msPurity::iwNormRcosine(minOff = -minOffset, maxOff = maxOffset)
94 iwNormFun = msPurity::iwNormRcosine(minOff=-minOffset, maxOff=maxOffset) 93 }else if (opt$iwNorm == "QE5") {
95 }else if (opt$iwNorm=='QE5'){ 94 iwNorm <- TRUE
96 iwNorm = TRUE 95 iwNormFun <- msPurity::iwNormQE.5()
97 iwNormFun = msPurity::iwNormQE.5()
98 } 96 }
99 97
100 print(xset@filepaths) 98 print(xset@filepaths)
101 99
102 if (!is.null(opt$files)){ 100 if (!is.null(opt$files)) {
103 updated_filepaths <- trimws(strsplit(opt$files, ',')[[1]]) 101 updated_filepaths <- trimws(strsplit(opt$files, ",")[[1]])
104 updated_filepaths <- updated_filepaths[updated_filepaths != ""] 102 updated_filepaths <- updated_filepaths[updated_filepaths != ""]
105 print(updated_filepaths) 103 print(updated_filepaths)
106 updated_filenames = basename(updated_filepaths) 104 updated_filenames <- basename(updated_filepaths)
107 original_filenames = basename(xset@filepaths) 105 original_filenames <- basename(xset@filepaths)
108 update_idx = match(updated_filenames, original_filenames) 106 update_idx <- match(updated_filenames, original_filenames)
109 107
110 if (!is.null(opt$galaxy_files)){ 108 if (!is.null(opt$galaxy_files)) {
111 galaxy_files <- trimws(strsplit(opt$galaxy_files, ',')[[1]]) 109 galaxy_files <- trimws(strsplit(opt$galaxy_files, ",")[[1]])
112 galaxy_files <- galaxy_files[galaxy_files != ""] 110 galaxy_files <- galaxy_files[galaxy_files != ""]
113 xset@filepaths <- galaxy_files[update_idx] 111 xset@filepaths <- galaxy_files[update_idx]
114 }else{ 112 }else{
115 xset@filepaths <- updated_filepaths[update_idx] 113 xset@filepaths <- updated_filepaths[update_idx]
116 } 114 }
117 } 115 }
118 116
119 if (!is.null(opt$choose_class)){ 117 if (!is.null(opt$choose_class)) {
120 classes <- trimws(strsplit(opt$choose_class, ',')[[1]]) 118 classes <- trimws(strsplit(opt$choose_class, ",")[[1]])
121
122 119
123 ignore_files_class <- which(!as.character(xset@phenoData$class) %in% classes) 120 ignore_files_class <- which(!as.character(xset@phenoData$class) %in% classes)
124 121
125 print('choose class') 122 print("choose class")
126 print(ignore_files_class) 123 print(ignore_files_class)
127 }else{ 124 }else{
128 ignore_files_class <- NA 125 ignore_files_class <- NA
129 } 126 }
130 127
131 if (!is.null(opt$ignore_files)){ 128 if (!is.null(opt$ignore_files)) {
132 ignore_files_string <- trimws(strsplit(opt$ignore_files, ',')[[1]]) 129 ignore_files_string <- trimws(strsplit(opt$ignore_files, ",")[[1]])
133 filenames <- rownames(xset@phenoData) 130 filenames <- rownames(xset@phenoData)
134 ignore_files <- which(filenames %in% ignore_files_string) 131 ignore_files <- which(filenames %in% ignore_files_string)
135 132
136 ignore_files <- unique(c(ignore_files, ignore_files_class)) 133 ignore_files <- unique(c(ignore_files, ignore_files_class))
137 ignore_files <- ignore_files[ignore_files != ""] 134 ignore_files <- ignore_files[ignore_files != ""]
138 }else{ 135 }else{
139 if (anyNA(ignore_files_class)){ 136 if (anyNA(ignore_files_class)) {
140 ignore_files <- NULL 137 ignore_files <- NULL
141 }else{ 138 }else{
142 ignore_files <- ignore_files_class 139 ignore_files <- ignore_files_class
143 } 140 }
144 141
145 } 142 }
146 143
147 print('ignore_files') 144 print("ignore_files")
148 print(ignore_files) 145 print(ignore_files)
149 146
150 147
151 ppLCMS <- msPurity::purityX(xset=xset, 148 ppLCMS <- msPurity::purityX(xset = xset,
152 offsets=c(minOffset, maxOffset), 149 offsets = c(minOffset, maxOffset),
153 cores=opt$cores, 150 cores = opt$cores,
154 xgroups=xgroups, 151 xgroups = xgroups,
155 purityType=opt$purityType, 152 purityType = opt$purityType,
156 ilim = opt$ilim, 153 ilim = opt$ilim,
157 isotopes = isotopes, 154 isotopes = isotopes,
158 im = im, 155 im = im,
159 iwNorm = iwNorm, 156 iwNorm = iwNorm,
160 iwNormFun = iwNormFun, 157 iwNormFun = iwNormFun,
161 singleFile = opt$singleFile, 158 singleFile = opt$singleFile,
162 fileignore = ignore_files, 159 fileignore = ignore_files,
163 rtrawColumns=rtraw_columns) 160 rtrawColumns = rtraw_columns)
164
165 161
166 dfp <- ppLCMS@predictions 162 dfp <- ppLCMS@predictions
167 163
168 # to make compatable with deconrank 164 # to make compatable with deconrank
169 colnames(dfp)[colnames(dfp)=='grpid'] = 'peakID' 165 colnames(dfp)[colnames(dfp) == "grpid"] <- "peakID"
170 colnames(dfp)[colnames(dfp)=='median'] = 'medianPurity' 166 colnames(dfp)[colnames(dfp) == "median"] <- "medianPurity"
171 colnames(dfp)[colnames(dfp)=='mean'] = 'meanPurity' 167 colnames(dfp)[colnames(dfp) == "mean"] <- "meanPurity"
172 colnames(dfp)[colnames(dfp)=='sd'] = 'sdPurity' 168 colnames(dfp)[colnames(dfp) == "sd"] <- "sdPurity"
173 colnames(dfp)[colnames(dfp)=='stde'] = 'sdePurity' 169 colnames(dfp)[colnames(dfp) == "stde"] <- "sdePurity"
174 colnames(dfp)[colnames(dfp)=='RSD'] = 'cvPurity' 170 colnames(dfp)[colnames(dfp) == "RSD"] <- "cvPurity"
175 colnames(dfp)[colnames(dfp)=='pknm'] = 'pknmPurity' 171 colnames(dfp)[colnames(dfp) == "pknm"] <- "pknmPurity"
176 if(sum(is.na(dfp$medianPurity))>0){ 172
177 dfp[is.na(dfp$medianPurity),]$medianPurity = 0 173 if (sum(is.na(dfp$medianPurity)) > 0) {
174 dfp[is.na(dfp$medianPurity), ]$medianPurity <- 0
178 } 175 }
179 176
177 print(head(dfp))
178 write.table(dfp, file.path(opt$out_dir, "purityX_output.tsv"), row.names = FALSE, sep = "\t")
180 179
181 print(head(dfp)) 180 save.image(file.path(opt$out_dir, "purityX_output.RData"))
182 write.table(dfp, file.path(opt$out_dir, 'purityX_output.tsv'), row.names=FALSE, sep='\t')
183
184 save.image(file.path(opt$out_dir, 'purityX_output.RData'))