Mercurial > repos > computational-metabolomics > mspurity_combineannotations
comparison flagRemove.R @ 8:94a2b6571758 draft
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 7e1748612a9f9dce11a9e54ff36752b600e7aea3
author | computational-metabolomics |
---|---|
date | Wed, 12 Jun 2024 16:06:42 +0000 |
parents | 18c0038dde9c |
children | f11c129a4227 |
comparison
equal
deleted
inserted
replaced
7:d7104112fac6 | 8:94a2b6571758 |
---|---|
1 library(msPurity) | 1 library(msPurity) |
2 library(optparse) | 2 library(optparse) |
3 print(sessionInfo()) | 3 print(sessionInfo()) |
4 option_list <- list( | 4 option_list <- list( |
5 make_option(c("-o", "--out_dir"), type = "character", default = getwd(), | 5 make_option(c("-o", "--out_dir"), |
6 help = "Output folder for resulting files [default = %default]" | 6 type = "character", default = getwd(), |
7 ), | 7 help = "Output folder for resulting files [default = %default]" |
8 make_option(c("-x", "--xset_path"), type = "character", default = file.path(getwd(), "xset.rds"), | 8 ), |
9 help = "The path to the xcmsSet object [default = %default]" | 9 make_option(c("-x", "--xset_path"), |
10 ), | 10 type = "character", default = file.path(getwd(), "xset.rds"), |
11 make_option("--polarity", default = NA, | 11 help = "The path to the xcmsSet object [default = %default]" |
12 help = "polarity (just used for naming purpose for files being saved) [positive, negative, NA] [default %default]" | 12 ), |
13 ), | 13 make_option("--polarity", |
14 make_option("--rsd_i_blank", default = 100, | 14 default = NA, |
15 help = "RSD threshold for the blank [default = %default]" | 15 help = "polarity (just used for naming purpose for files being saved) [positive, negative, NA] [default %default]" |
16 ), | 16 ), |
17 make_option("--minfrac_blank", default = 0.5, | 17 make_option("--rsd_i_blank", |
18 help = "minimum fraction of files for features needed for the blank [default = %default]" | 18 default = 100, |
19 ), | 19 help = "RSD threshold for the blank [default = %default]" |
20 make_option("--rsd_rt_blank", default = 100, | 20 ), |
21 help = "RSD threshold for the RT of the blank [default = %default]" | 21 make_option("--minfrac_blank", |
22 ), | 22 default = 0.5, |
23 | 23 help = "minimum fraction of files for features needed for the blank [default = %default]" |
24 make_option("--ithres_blank", default = 0, | 24 ), |
25 help = "Intensity threshold for the blank [default = %default]" | 25 make_option("--rsd_rt_blank", |
26 ), | 26 default = 100, |
27 make_option("--s2b", default = 10, | 27 help = "RSD threshold for the RT of the blank [default = %default]" |
28 help = "fold change (sample/blank) needed for sample peak to be allowed. e.g. | 28 ), |
29 make_option("--ithres_blank", | |
30 default = 0, | |
31 help = "Intensity threshold for the blank [default = %default]" | |
32 ), | |
33 make_option("--s2b", | |
34 default = 10, | |
35 help = "fold change (sample/blank) needed for sample peak to be allowed. e.g. | |
29 if s2b set to 10 and the recorded sample 'intensity' value was 100 and blank was 10. | 36 if s2b set to 10 and the recorded sample 'intensity' value was 100 and blank was 10. |
30 1000/10 = 100, so sample has fold change higher than the threshold and the peak | 37 1000/10 = 100, so sample has fold change higher than the threshold and the peak |
31 is not considered a blank [default = %default]" | 38 is not considered a blank [default = %default]" |
32 ), | 39 ), |
33 make_option("--blank_class", default = "blank", type = "character", | 40 make_option("--blank_class", |
34 help = "A string representing the class that will be used for the blank.[default = %default]" | 41 default = "blank", type = "character", |
35 ), | 42 help = "A string representing the class that will be used for the blank.[default = %default]" |
36 make_option("--egauss_thr", default = NA, | 43 ), |
37 help = "Threshold for filtering out non gaussian shaped peaks. Note this only works | 44 make_option("--egauss_thr", |
45 default = NA, | |
46 help = "Threshold for filtering out non gaussian shaped peaks. Note this only works | |
38 if the 'verbose columns' and 'fit gauss' was used with xcms | 47 if the 'verbose columns' and 'fit gauss' was used with xcms |
39 [default = %default]" | 48 [default = %default]" |
40 ), | 49 ), |
41 make_option("--rsd_i_sample", default = 100, | 50 make_option("--rsd_i_sample", |
42 help = "RSD threshold for the samples [default = %default]" | 51 default = 100, |
43 ), | 52 help = "RSD threshold for the samples [default = %default]" |
44 make_option("--minfrac_sample", default = 0.8, | 53 ), |
45 help = "minimum fraction of files for features needed for the samples [default = %default]" | 54 make_option("--minfrac_sample", |
46 ), | 55 default = 0.8, |
47 make_option("--rsd_rt_sample", default = 100, | 56 help = "minimum fraction of files for features needed for the samples [default = %default]" |
48 help = "RSD threshold for the RT of the samples [default = %default]" | 57 ), |
49 ), | 58 make_option("--rsd_rt_sample", |
50 make_option("--ithres_sample", default = 5000, | 59 default = 100, |
51 help = "Intensity threshold for the sample [default = %default]" | 60 help = "RSD threshold for the RT of the samples [default = %default]" |
52 ), | 61 ), |
53 make_option("--grp_rm_ids", default = NA, | 62 make_option("--ithres_sample", |
54 help = "vector of grouped_xcms peaks to remove (corresponds to the row from xcms::group output) | 63 default = 5000, |
64 help = "Intensity threshold for the sample [default = %default]" | |
65 ), | |
66 make_option("--grp_rm_ids", | |
67 default = NA, | |
68 help = "vector of grouped_xcms peaks to remove (corresponds to the row from xcms::group output) | |
55 [default = %default]" | 69 [default = %default]" |
56 ), | 70 ), |
57 make_option("--remove_spectra", action = "store_true", | 71 make_option("--remove_spectra", |
58 help = "TRUE if flagged spectra is to be removed [default = %default]" | 72 action = "store_true", |
59 ), | 73 help = "TRUE if flagged spectra is to be removed [default = %default]" |
60 make_option("--minfrac_xcms", default = 0.5, | 74 ), |
61 help = "minfrac for xcms grouping [default = %default]" | 75 make_option("--minfrac_xcms", |
62 ), | 76 default = 0.5, |
63 make_option("--mzwid", default = 0.001, | 77 help = "minfrac for xcms grouping [default = %default]" |
64 help = "mzwid for xcms grouping [default = %default]" | 78 ), |
65 ), | 79 make_option("--mzwid", |
66 make_option("--bw", default = 5, | 80 default = 0.001, |
67 help = "bw for xcms grouping [default = %default]" | 81 help = "mzwid for xcms grouping [default = %default]" |
68 ), | 82 ), |
69 | 83 make_option("--bw", |
70 make_option("--temp_save", action = "store_true", | 84 default = 5, |
71 help = "Assign True if files for each step saved (for testing purposes) [default = %default]" | 85 help = "bw for xcms grouping [default = %default]" |
72 ), | 86 ), |
73 | 87 make_option("--temp_save", |
74 make_option("--samplelist", type = "character", help = "Sample list to determine the blank class") | 88 action = "store_true", |
75 | 89 help = "Assign True if files for each step saved (for testing purposes) [default = %default]" |
90 ), | |
91 make_option("--samplelist", type = "character", help = "Sample list to determine the blank class") | |
76 ) | 92 ) |
77 | 93 |
78 # nolint start | 94 # nolint start |
79 # make_option("--multilist", action="store_true" | 95 # make_option("--multilist", action="store_true" |
80 # help="NOT CURRENTLY IMPLEMENTED: If paired blank removal is to be performed a - multilist - sample list file has to be provided" | 96 # help="NOT CURRENTLY IMPLEMENTED: If paired blank removal is to be performed a - multilist - sample list file has to be provided" |
86 | 102 |
87 opt <- replace(opt, opt == "NA", NA) | 103 opt <- replace(opt, opt == "NA", NA) |
88 | 104 |
89 if (is.null(opt$temp_save)) { | 105 if (is.null(opt$temp_save)) { |
90 temp_save <- FALSE | 106 temp_save <- FALSE |
91 }else{ | 107 } else { |
92 temp_save <- TRUE | 108 temp_save <- TRUE |
93 } | 109 } |
94 | 110 |
95 if (is.null(opt$remove_spectra)) { | 111 if (is.null(opt$remove_spectra)) { |
96 remove_spectra <- FALSE | 112 remove_spectra <- FALSE |
97 }else{ | 113 } else { |
98 remove_spectra <- TRUE | 114 remove_spectra <- TRUE |
99 } | 115 } |
100 | 116 |
101 | 117 |
102 print(opt) | 118 print(opt) |
103 | 119 |
104 getxcmsSetObject <- function(xobject) { | 120 getxcmsSetObject <- function(xobject) { |
105 # XCMS 1.x | 121 # XCMS 1.x |
106 if (class(xobject) == "xcmsSet") | 122 if (class(xobject) == "xcmsSet") { |
107 return(xobject) | 123 return(xobject) |
124 } | |
108 # XCMS 3.x | 125 # XCMS 3.x |
109 if (class(xobject) == "XCMSnExp") { | 126 if (class(xobject) == "XCMSnExp") { |
110 # Get the legacy xcmsSet object | 127 # Get the legacy xcmsSet object |
111 suppressWarnings(xset <- as(xobject, "xcmsSet")) | 128 suppressWarnings(xset <- as(xobject, "xcmsSet")) |
112 xcms::sampclass(xset) <- xset@phenoData$sample_group | 129 xcms::sampclass(xset) <- xset@phenoData$sample_group |
114 } | 131 } |
115 } | 132 } |
116 | 133 |
117 | 134 |
118 loadRData <- function(rdata_path, name) { | 135 loadRData <- function(rdata_path, name) { |
119 #loads an RData file, and returns the named xset object if it is there | 136 # loads an RData file, and returns the named xset object if it is there |
120 load(rdata_path) | 137 load(rdata_path) |
121 return(get(ls()[ls() %in% name])) | 138 return(get(ls()[ls() %in% name])) |
122 } | 139 } |
123 | 140 |
124 xset <- getxcmsSetObject(loadRData(opt$xset_path, c("xset", "xdata"))) | 141 xset <- getxcmsSetObject(loadRData(opt$xset_path, c("xset", "xdata"))) |
125 | 142 |
126 print(xset) | 143 print(xset) |
127 if (is.null(opt$samplelist)) { | 144 if (is.null(opt$samplelist)) { |
128 blank_class <- opt$blank_class | 145 blank_class <- opt$blank_class |
129 }else{ | 146 } else { |
130 samplelist <- read.table(opt$samplelist, sep = "\t", header = TRUE) | 147 samplelist <- read.table(opt$samplelist, sep = "\t", header = TRUE) |
131 samplelist_blank <- unique(samplelist$sample_class[samplelist$blank == "yes"]) | 148 samplelist_blank <- unique(samplelist$sample_class[samplelist$blank == "yes"]) |
132 | 149 |
133 chosen_blank <- samplelist_blank[samplelist_blank %in% xset@phenoData$class] | 150 chosen_blank <- samplelist_blank[samplelist_blank %in% xset@phenoData$class] |
134 if (length(chosen_blank) > 1) { | 151 if (length(chosen_blank) > 1) { |
140 } | 157 } |
141 | 158 |
142 | 159 |
143 if (is.null(opt$multilist)) { | 160 if (is.null(opt$multilist)) { |
144 ffrm_out <- flag_remove(xset, | 161 ffrm_out <- flag_remove(xset, |
145 pol = opt$polarity, | 162 pol = opt$polarity, |
146 rsd_i_blank = opt$rsd_i_blank, | 163 rsd_i_blank = opt$rsd_i_blank, |
147 minfrac_blank = opt$minfrac_blank, | 164 minfrac_blank = opt$minfrac_blank, |
148 rsd_rt_blank = opt$rsd_rt_blank, | 165 rsd_rt_blank = opt$rsd_rt_blank, |
149 ithres_blank = opt$ithres_blank, | 166 ithres_blank = opt$ithres_blank, |
150 s2b = opt$s2b, | 167 s2b = opt$s2b, |
151 ref.class = blank_class, | 168 ref.class = blank_class, |
152 egauss_thr = opt$egauss_thr, | 169 egauss_thr = opt$egauss_thr, |
153 rsd_i_sample = opt$rsd_i_sample, | 170 rsd_i_sample = opt$rsd_i_sample, |
154 minfrac_sample = opt$minfrac_sample, | 171 minfrac_sample = opt$minfrac_sample, |
155 rsd_rt_sample = opt$rsd_rt_sample, | 172 rsd_rt_sample = opt$rsd_rt_sample, |
156 ithres_sample = opt$ithres_sample, | 173 ithres_sample = opt$ithres_sample, |
157 minfrac_xcms = opt$minfrac_xcms, | 174 minfrac_xcms = opt$minfrac_xcms, |
158 mzwid = opt$mzwid, | 175 mzwid = opt$mzwid, |
159 bw = opt$bw, | 176 bw = opt$bw, |
160 out_dir = opt$out_dir, | 177 out_dir = opt$out_dir, |
161 temp_save = temp_save, | 178 temp_save = temp_save, |
162 remove_spectra = remove_spectra, | 179 remove_spectra = remove_spectra, |
163 grp_rm_ids = unlist(strsplit(as.character(opt$grp_rm_ids), split = ", "))[[1]]) | 180 grp_rm_ids = unlist(strsplit(as.character(opt$grp_rm_ids), split = ", "))[[1]] |
181 ) | |
164 print("flag remove finished") | 182 print("flag remove finished") |
165 xset <- ffrm_out[[1]] | 183 xset <- ffrm_out[[1]] |
166 grp_peaklist <- ffrm_out[[2]] | 184 grp_peaklist <- ffrm_out[[2]] |
167 removed_peaks <- ffrm_out[[3]] | 185 removed_peaks <- ffrm_out[[3]] |
168 | 186 |
170 | 188 |
171 # grpid needed for mspurity ID needed for deconrank... (will clean up at some up) | 189 # grpid needed for mspurity ID needed for deconrank... (will clean up at some up) |
172 peak_pth <- file.path(opt$out_dir, "peaklist_filtered.tsv") | 190 peak_pth <- file.path(opt$out_dir, "peaklist_filtered.tsv") |
173 print(peak_pth) | 191 print(peak_pth) |
174 write.table(data.frame("grpid" = rownames(grp_peaklist), "ID" = rownames(grp_peaklist), grp_peaklist), | 192 write.table(data.frame("grpid" = rownames(grp_peaklist), "ID" = rownames(grp_peaklist), grp_peaklist), |
175 peak_pth, row.names = FALSE, sep = "\t") | 193 peak_pth, |
194 row.names = FALSE, sep = "\t" | |
195 ) | |
176 | 196 |
177 removed_peaks <- data.frame(removed_peaks) | 197 removed_peaks <- data.frame(removed_peaks) |
178 write.table(data.frame("ID" = rownames(removed_peaks), removed_peaks), | 198 write.table(data.frame("ID" = rownames(removed_peaks), removed_peaks), |
179 file.path(opt$out_dir, "removed_peaks.tsv"), row.names = FALSE, sep = "\t") | 199 file.path(opt$out_dir, "removed_peaks.tsv"), |
180 | 200 row.names = FALSE, sep = "\t" |
181 }else{ | 201 ) |
182 | 202 } else { |
183 # nolint start | 203 # nolint start |
184 # TODO | 204 # TODO |
185 #xsets <- split(xset, multilist_df$multlist) | 205 # xsets <- split(xset, multilist_df$multlist) |
186 # | 206 # |
187 #mult_grps <- unique(multilist_df$multlist) | 207 # mult_grps <- unique(multilist_df$multlist) |
188 # | 208 # |
189 #for (mgrp in mult_grps){ | 209 # for (mgrp in mult_grps){ |
190 # xset_i <- xsets[mgrp] | 210 # xset_i <- xsets[mgrp] |
191 # xcms::group(xset_i, | 211 # xcms::group(xset_i, |
192 # | 212 # |
193 # } | 213 # } |
194 # nolint end | 214 # nolint end |
195 | 215 } |
196 | |
197 } |