comparison flagRemove.R @ 8:77706396e7bd draft

planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 7e1748612a9f9dce11a9e54ff36752b600e7aea3
author computational-metabolomics
date Wed, 12 Jun 2024 16:03:14 +0000
parents fecfe8c80e25
children c33b92eeb1fb
comparison
equal deleted inserted replaced
7:20f4fdaf36bb 8:77706396e7bd
1 library(msPurity) 1 library(msPurity)
2 library(optparse) 2 library(optparse)
3 print(sessionInfo()) 3 print(sessionInfo())
4 option_list <- list( 4 option_list <- list(
5 make_option(c("-o", "--out_dir"), type = "character", default = getwd(), 5 make_option(c("-o", "--out_dir"),
6 help = "Output folder for resulting files [default = %default]" 6 type = "character", default = getwd(),
7 ), 7 help = "Output folder for resulting files [default = %default]"
8 make_option(c("-x", "--xset_path"), type = "character", default = file.path(getwd(), "xset.rds"), 8 ),
9 help = "The path to the xcmsSet object [default = %default]" 9 make_option(c("-x", "--xset_path"),
10 ), 10 type = "character", default = file.path(getwd(), "xset.rds"),
11 make_option("--polarity", default = NA, 11 help = "The path to the xcmsSet object [default = %default]"
12 help = "polarity (just used for naming purpose for files being saved) [positive, negative, NA] [default %default]" 12 ),
13 ), 13 make_option("--polarity",
14 make_option("--rsd_i_blank", default = 100, 14 default = NA,
15 help = "RSD threshold for the blank [default = %default]" 15 help = "polarity (just used for naming purpose for files being saved) [positive, negative, NA] [default %default]"
16 ), 16 ),
17 make_option("--minfrac_blank", default = 0.5, 17 make_option("--rsd_i_blank",
18 help = "minimum fraction of files for features needed for the blank [default = %default]" 18 default = 100,
19 ), 19 help = "RSD threshold for the blank [default = %default]"
20 make_option("--rsd_rt_blank", default = 100, 20 ),
21 help = "RSD threshold for the RT of the blank [default = %default]" 21 make_option("--minfrac_blank",
22 ), 22 default = 0.5,
23 23 help = "minimum fraction of files for features needed for the blank [default = %default]"
24 make_option("--ithres_blank", default = 0, 24 ),
25 help = "Intensity threshold for the blank [default = %default]" 25 make_option("--rsd_rt_blank",
26 ), 26 default = 100,
27 make_option("--s2b", default = 10, 27 help = "RSD threshold for the RT of the blank [default = %default]"
28 help = "fold change (sample/blank) needed for sample peak to be allowed. e.g. 28 ),
29 make_option("--ithres_blank",
30 default = 0,
31 help = "Intensity threshold for the blank [default = %default]"
32 ),
33 make_option("--s2b",
34 default = 10,
35 help = "fold change (sample/blank) needed for sample peak to be allowed. e.g.
29 if s2b set to 10 and the recorded sample 'intensity' value was 100 and blank was 10. 36 if s2b set to 10 and the recorded sample 'intensity' value was 100 and blank was 10.
30 1000/10 = 100, so sample has fold change higher than the threshold and the peak 37 1000/10 = 100, so sample has fold change higher than the threshold and the peak
31 is not considered a blank [default = %default]" 38 is not considered a blank [default = %default]"
32 ), 39 ),
33 make_option("--blank_class", default = "blank", type = "character", 40 make_option("--blank_class",
34 help = "A string representing the class that will be used for the blank.[default = %default]" 41 default = "blank", type = "character",
35 ), 42 help = "A string representing the class that will be used for the blank.[default = %default]"
36 make_option("--egauss_thr", default = NA, 43 ),
37 help = "Threshold for filtering out non gaussian shaped peaks. Note this only works 44 make_option("--egauss_thr",
45 default = NA,
46 help = "Threshold for filtering out non gaussian shaped peaks. Note this only works
38 if the 'verbose columns' and 'fit gauss' was used with xcms 47 if the 'verbose columns' and 'fit gauss' was used with xcms
39 [default = %default]" 48 [default = %default]"
40 ), 49 ),
41 make_option("--rsd_i_sample", default = 100, 50 make_option("--rsd_i_sample",
42 help = "RSD threshold for the samples [default = %default]" 51 default = 100,
43 ), 52 help = "RSD threshold for the samples [default = %default]"
44 make_option("--minfrac_sample", default = 0.8, 53 ),
45 help = "minimum fraction of files for features needed for the samples [default = %default]" 54 make_option("--minfrac_sample",
46 ), 55 default = 0.8,
47 make_option("--rsd_rt_sample", default = 100, 56 help = "minimum fraction of files for features needed for the samples [default = %default]"
48 help = "RSD threshold for the RT of the samples [default = %default]" 57 ),
49 ), 58 make_option("--rsd_rt_sample",
50 make_option("--ithres_sample", default = 5000, 59 default = 100,
51 help = "Intensity threshold for the sample [default = %default]" 60 help = "RSD threshold for the RT of the samples [default = %default]"
52 ), 61 ),
53 make_option("--grp_rm_ids", default = NA, 62 make_option("--ithres_sample",
54 help = "vector of grouped_xcms peaks to remove (corresponds to the row from xcms::group output) 63 default = 5000,
64 help = "Intensity threshold for the sample [default = %default]"
65 ),
66 make_option("--grp_rm_ids",
67 default = NA,
68 help = "vector of grouped_xcms peaks to remove (corresponds to the row from xcms::group output)
55 [default = %default]" 69 [default = %default]"
56 ), 70 ),
57 make_option("--remove_spectra", action = "store_true", 71 make_option("--remove_spectra",
58 help = "TRUE if flagged spectra is to be removed [default = %default]" 72 action = "store_true",
59 ), 73 help = "TRUE if flagged spectra is to be removed [default = %default]"
60 make_option("--minfrac_xcms", default = 0.5, 74 ),
61 help = "minfrac for xcms grouping [default = %default]" 75 make_option("--minfrac_xcms",
62 ), 76 default = 0.5,
63 make_option("--mzwid", default = 0.001, 77 help = "minfrac for xcms grouping [default = %default]"
64 help = "mzwid for xcms grouping [default = %default]" 78 ),
65 ), 79 make_option("--mzwid",
66 make_option("--bw", default = 5, 80 default = 0.001,
67 help = "bw for xcms grouping [default = %default]" 81 help = "mzwid for xcms grouping [default = %default]"
68 ), 82 ),
69 83 make_option("--bw",
70 make_option("--temp_save", action = "store_true", 84 default = 5,
71 help = "Assign True if files for each step saved (for testing purposes) [default = %default]" 85 help = "bw for xcms grouping [default = %default]"
72 ), 86 ),
73 87 make_option("--temp_save",
74 make_option("--samplelist", type = "character", help = "Sample list to determine the blank class") 88 action = "store_true",
75 89 help = "Assign True if files for each step saved (for testing purposes) [default = %default]"
90 ),
91 make_option("--samplelist", type = "character", help = "Sample list to determine the blank class")
76 ) 92 )
77 93
78 # nolint start 94 # nolint start
79 # make_option("--multilist", action="store_true" 95 # make_option("--multilist", action="store_true"
80 # help="NOT CURRENTLY IMPLEMENTED: If paired blank removal is to be performed a - multilist - sample list file has to be provided" 96 # help="NOT CURRENTLY IMPLEMENTED: If paired blank removal is to be performed a - multilist - sample list file has to be provided"
86 102
87 opt <- replace(opt, opt == "NA", NA) 103 opt <- replace(opt, opt == "NA", NA)
88 104
89 if (is.null(opt$temp_save)) { 105 if (is.null(opt$temp_save)) {
90 temp_save <- FALSE 106 temp_save <- FALSE
91 }else{ 107 } else {
92 temp_save <- TRUE 108 temp_save <- TRUE
93 } 109 }
94 110
95 if (is.null(opt$remove_spectra)) { 111 if (is.null(opt$remove_spectra)) {
96 remove_spectra <- FALSE 112 remove_spectra <- FALSE
97 }else{ 113 } else {
98 remove_spectra <- TRUE 114 remove_spectra <- TRUE
99 } 115 }
100 116
101 117
102 print(opt) 118 print(opt)
103 119
104 getxcmsSetObject <- function(xobject) { 120 getxcmsSetObject <- function(xobject) {
105 # XCMS 1.x 121 # XCMS 1.x
106 if (class(xobject) == "xcmsSet") 122 if (class(xobject) == "xcmsSet") {
107 return(xobject) 123 return(xobject)
124 }
108 # XCMS 3.x 125 # XCMS 3.x
109 if (class(xobject) == "XCMSnExp") { 126 if (class(xobject) == "XCMSnExp") {
110 # Get the legacy xcmsSet object 127 # Get the legacy xcmsSet object
111 suppressWarnings(xset <- as(xobject, "xcmsSet")) 128 suppressWarnings(xset <- as(xobject, "xcmsSet"))
112 xcms::sampclass(xset) <- xset@phenoData$sample_group 129 xcms::sampclass(xset) <- xset@phenoData$sample_group
114 } 131 }
115 } 132 }
116 133
117 134
118 loadRData <- function(rdata_path, name) { 135 loadRData <- function(rdata_path, name) {
119 #loads an RData file, and returns the named xset object if it is there 136 # loads an RData file, and returns the named xset object if it is there
120 load(rdata_path) 137 load(rdata_path)
121 return(get(ls()[ls() %in% name])) 138 return(get(ls()[ls() %in% name]))
122 } 139 }
123 140
124 xset <- getxcmsSetObject(loadRData(opt$xset_path, c("xset", "xdata"))) 141 xset <- getxcmsSetObject(loadRData(opt$xset_path, c("xset", "xdata")))
125 142
126 print(xset) 143 print(xset)
127 if (is.null(opt$samplelist)) { 144 if (is.null(opt$samplelist)) {
128 blank_class <- opt$blank_class 145 blank_class <- opt$blank_class
129 }else{ 146 } else {
130 samplelist <- read.table(opt$samplelist, sep = "\t", header = TRUE) 147 samplelist <- read.table(opt$samplelist, sep = "\t", header = TRUE)
131 samplelist_blank <- unique(samplelist$sample_class[samplelist$blank == "yes"]) 148 samplelist_blank <- unique(samplelist$sample_class[samplelist$blank == "yes"])
132 149
133 chosen_blank <- samplelist_blank[samplelist_blank %in% xset@phenoData$class] 150 chosen_blank <- samplelist_blank[samplelist_blank %in% xset@phenoData$class]
134 if (length(chosen_blank) > 1) { 151 if (length(chosen_blank) > 1) {
140 } 157 }
141 158
142 159
143 if (is.null(opt$multilist)) { 160 if (is.null(opt$multilist)) {
144 ffrm_out <- flag_remove(xset, 161 ffrm_out <- flag_remove(xset,
145 pol = opt$polarity, 162 pol = opt$polarity,
146 rsd_i_blank = opt$rsd_i_blank, 163 rsd_i_blank = opt$rsd_i_blank,
147 minfrac_blank = opt$minfrac_blank, 164 minfrac_blank = opt$minfrac_blank,
148 rsd_rt_blank = opt$rsd_rt_blank, 165 rsd_rt_blank = opt$rsd_rt_blank,
149 ithres_blank = opt$ithres_blank, 166 ithres_blank = opt$ithres_blank,
150 s2b = opt$s2b, 167 s2b = opt$s2b,
151 ref.class = blank_class, 168 ref.class = blank_class,
152 egauss_thr = opt$egauss_thr, 169 egauss_thr = opt$egauss_thr,
153 rsd_i_sample = opt$rsd_i_sample, 170 rsd_i_sample = opt$rsd_i_sample,
154 minfrac_sample = opt$minfrac_sample, 171 minfrac_sample = opt$minfrac_sample,
155 rsd_rt_sample = opt$rsd_rt_sample, 172 rsd_rt_sample = opt$rsd_rt_sample,
156 ithres_sample = opt$ithres_sample, 173 ithres_sample = opt$ithres_sample,
157 minfrac_xcms = opt$minfrac_xcms, 174 minfrac_xcms = opt$minfrac_xcms,
158 mzwid = opt$mzwid, 175 mzwid = opt$mzwid,
159 bw = opt$bw, 176 bw = opt$bw,
160 out_dir = opt$out_dir, 177 out_dir = opt$out_dir,
161 temp_save = temp_save, 178 temp_save = temp_save,
162 remove_spectra = remove_spectra, 179 remove_spectra = remove_spectra,
163 grp_rm_ids = unlist(strsplit(as.character(opt$grp_rm_ids), split = ", "))[[1]]) 180 grp_rm_ids = unlist(strsplit(as.character(opt$grp_rm_ids), split = ", "))[[1]]
181 )
164 print("flag remove finished") 182 print("flag remove finished")
165 xset <- ffrm_out[[1]] 183 xset <- ffrm_out[[1]]
166 grp_peaklist <- ffrm_out[[2]] 184 grp_peaklist <- ffrm_out[[2]]
167 removed_peaks <- ffrm_out[[3]] 185 removed_peaks <- ffrm_out[[3]]
168 186
170 188
171 # grpid needed for mspurity ID needed for deconrank... (will clean up at some up) 189 # grpid needed for mspurity ID needed for deconrank... (will clean up at some up)
172 peak_pth <- file.path(opt$out_dir, "peaklist_filtered.tsv") 190 peak_pth <- file.path(opt$out_dir, "peaklist_filtered.tsv")
173 print(peak_pth) 191 print(peak_pth)
174 write.table(data.frame("grpid" = rownames(grp_peaklist), "ID" = rownames(grp_peaklist), grp_peaklist), 192 write.table(data.frame("grpid" = rownames(grp_peaklist), "ID" = rownames(grp_peaklist), grp_peaklist),
175 peak_pth, row.names = FALSE, sep = "\t") 193 peak_pth,
194 row.names = FALSE, sep = "\t"
195 )
176 196
177 removed_peaks <- data.frame(removed_peaks) 197 removed_peaks <- data.frame(removed_peaks)
178 write.table(data.frame("ID" = rownames(removed_peaks), removed_peaks), 198 write.table(data.frame("ID" = rownames(removed_peaks), removed_peaks),
179 file.path(opt$out_dir, "removed_peaks.tsv"), row.names = FALSE, sep = "\t") 199 file.path(opt$out_dir, "removed_peaks.tsv"),
180 200 row.names = FALSE, sep = "\t"
181 }else{ 201 )
182 202 } else {
183 # nolint start 203 # nolint start
184 # TODO 204 # TODO
185 #xsets <- split(xset, multilist_df$multlist) 205 # xsets <- split(xset, multilist_df$multlist)
186 # 206 #
187 #mult_grps <- unique(multilist_df$multlist) 207 # mult_grps <- unique(multilist_df$multlist)
188 # 208 #
189 #for (mgrp in mult_grps){ 209 # for (mgrp in mult_grps){
190 # xset_i <- xsets[mgrp] 210 # xset_i <- xsets[mgrp]
191 # xcms::group(xset_i, 211 # xcms::group(xset_i,
192 # 212 #
193 # } 213 # }
194 # nolint end 214 # nolint end
195 215 }
196
197 }