Mercurial > repos > computational-metabolomics > mspurity_createdatabase
comparison flagRemove.R @ 6:2f71b3495221 draft
"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2579c8746819670348c378f86116f83703c493eb"
author | computational-metabolomics |
---|---|
date | Thu, 04 Mar 2021 12:27:21 +0000 |
parents | 600a866a510c |
children | efd14b326007 |
comparison
equal
deleted
inserted
replaced
5:3ec6fd8e4c17 | 6:2f71b3495221 |
---|---|
1 library(msPurity) | 1 library(msPurity) |
2 library(optparse) | 2 library(optparse) |
3 print(sessionInfo()) | 3 print(sessionInfo()) |
4 option_list <- list( | 4 option_list <- list( |
5 make_option(c("-o", "--out_dir"), type="character", default=getwd(), | 5 make_option(c("-o", "--out_dir"), type = "character", default = getwd(), |
6 help="Output folder for resulting files [default = %default]" | 6 help = "Output folder for resulting files [default = %default]" |
7 ), | 7 ), |
8 make_option(c("-x", "--xset_path"), type="character", default=file.path(getwd(),"xset.rds"), | 8 make_option(c("-x", "--xset_path"), type = "character", default = file.path(getwd(), "xset.rds"), |
9 help="The path to the xcmsSet object [default = %default]" | 9 help = "The path to the xcmsSet object [default = %default]" |
10 ), | 10 ), |
11 make_option("--polarity", default=NA, | 11 make_option("--polarity", default = NA, |
12 help="polarity (just used for naming purpose for files being saved) [positive, negative, NA] [default %default]" | 12 help = "polarity (just used for naming purpose for files being saved) [positive, negative, NA] [default %default]" |
13 ), | 13 ), |
14 make_option("--rsd_i_blank", default=100, | 14 make_option("--rsd_i_blank", default = 100, |
15 help="RSD threshold for the blank [default = %default]" | 15 help = "RSD threshold for the blank [default = %default]" |
16 ), | 16 ), |
17 make_option("--minfrac_blank", default=0.5, | 17 make_option("--minfrac_blank", default = 0.5, |
18 help="minimum fraction of files for features needed for the blank [default = %default]" | 18 help = "minimum fraction of files for features needed for the blank [default = %default]" |
19 ), | 19 ), |
20 make_option("--rsd_rt_blank", default=100, | 20 make_option("--rsd_rt_blank", default = 100, |
21 help="RSD threshold for the RT of the blank [default = %default]" | 21 help = "RSD threshold for the RT of the blank [default = %default]" |
22 ), | 22 ), |
23 | 23 |
24 make_option("--ithres_blank", default=0, | 24 make_option("--ithres_blank", default = 0, |
25 help="Intensity threshold for the blank [default = %default]" | 25 help = "Intensity threshold for the blank [default = %default]" |
26 ), | 26 ), |
27 make_option("--s2b", default=10, | 27 make_option("--s2b", default = 10, |
28 help="fold change (sample/blank) needed for sample peak to be allowed. e.g. | 28 help = "fold change (sample/blank) needed for sample peak to be allowed. e.g. |
29 if s2b set to 10 and the recorded sample 'intensity' value was 100 and blank was 10. | 29 if s2b set to 10 and the recorded sample 'intensity' value was 100 and blank was 10. |
30 1000/10 = 100, so sample has fold change higher than the threshold and the peak | 30 1000/10 = 100, so sample has fold change higher than the threshold and the peak |
31 is not considered a blank [default = %default]" | 31 is not considered a blank [default = %default]" |
32 ), | 32 ), |
33 make_option("--blank_class", default='blank', type="character", | 33 make_option("--blank_class", default = "blank", type = "character", |
34 help="A string representing the class that will be used for the blank.[default = %default]" | 34 help = "A string representing the class that will be used for the blank.[default = %default]" |
35 ), | 35 ), |
36 make_option("--egauss_thr", default=NA, | 36 make_option("--egauss_thr", default = NA, |
37 help="Threshold for filtering out non gaussian shaped peaks. Note this only works | 37 help = "Threshold for filtering out non gaussian shaped peaks. Note this only works |
38 if the 'verbose columns' and 'fit gauss' was used with xcms | 38 if the 'verbose columns' and 'fit gauss' was used with xcms |
39 [default = %default]" | 39 [default = %default]" |
40 ), | 40 ), |
41 make_option("--rsd_i_sample", default=100, | 41 make_option("--rsd_i_sample", default = 100, |
42 help="RSD threshold for the samples [default = %default]" | 42 help = "RSD threshold for the samples [default = %default]" |
43 ), | 43 ), |
44 make_option("--minfrac_sample", default=0.8, | 44 make_option("--minfrac_sample", default = 0.8, |
45 help="minimum fraction of files for features needed for the samples [default = %default]" | 45 help = "minimum fraction of files for features needed for the samples [default = %default]" |
46 ), | 46 ), |
47 make_option("--rsd_rt_sample", default=100, | 47 make_option("--rsd_rt_sample", default = 100, |
48 help="RSD threshold for the RT of the samples [default = %default]" | 48 help = "RSD threshold for the RT of the samples [default = %default]" |
49 ), | 49 ), |
50 make_option("--ithres_sample", default=5000, | 50 make_option("--ithres_sample", default = 5000, |
51 help="Intensity threshold for the sample [default = %default]" | 51 help = "Intensity threshold for the sample [default = %default]" |
52 ), | 52 ), |
53 make_option("--grp_rm_ids", default=NA, | 53 make_option("--grp_rm_ids", default = NA, |
54 help="vector of grouped_xcms peaks to remove (corresponds to the row from xcms::group output) | 54 help = "vector of grouped_xcms peaks to remove (corresponds to the row from xcms::group output) |
55 [default = %default]" | 55 [default = %default]" |
56 ), | 56 ), |
57 make_option("--remove_spectra", action="store_true", | 57 make_option("--remove_spectra", action = "store_true", |
58 help=" TRUE if flagged spectra is to be removed [default = %default]" | 58 help = "TRUE if flagged spectra is to be removed [default = %default]" |
59 ), | 59 ), |
60 make_option("--minfrac_xcms", default=0.5, | 60 make_option("--minfrac_xcms", default = 0.5, |
61 help="minfrac for xcms grouping [default = %default]" | 61 help = "minfrac for xcms grouping [default = %default]" |
62 ), | 62 ), |
63 make_option("--mzwid", default=0.001, | 63 make_option("--mzwid", default = 0.001, |
64 help="mzwid for xcms grouping [default = %default]" | 64 help = "mzwid for xcms grouping [default = %default]" |
65 ), | 65 ), |
66 make_option("--bw", default=5, | 66 make_option("--bw", default = 5, |
67 help="bw for xcms grouping [default = %default]" | 67 help = "bw for xcms grouping [default = %default]" |
68 ), | 68 ), |
69 | 69 |
70 make_option("--temp_save", action="store_true", | 70 make_option("--temp_save", action = "store_true", |
71 help="Assign True if files for each step saved (for testing purposes) [default = %default]" | 71 help = "Assign True if files for each step saved (for testing purposes) [default = %default]" |
72 ), | 72 ), |
73 | 73 |
74 make_option("--samplelist", type="character", help="Sample list to determine the blank class") | 74 make_option("--samplelist", type = "character", help = "Sample list to determine the blank class") |
75 | |
76 | |
77 | |
78 | |
79 | 75 |
80 ) | 76 ) |
81 | 77 |
82 #make_option("--multilist", action="store_true" | 78 # nolint start |
83 # help="NOT CURRENTLY IMPLEMENTED: If paired blank removal is to be performed a - multilist - sample list file has to be provided" | 79 # make_option("--multilist", action="store_true" |
84 #), | 80 # help="NOT CURRENTLY IMPLEMENTED: If paired blank removal is to be performed a - multilist - sample list file has to be provided" |
81 # ), | |
82 # nolint end | |
85 | 83 |
86 # store options | 84 # store options |
87 opt<- parse_args(OptionParser(option_list=option_list)) | 85 opt <- parse_args(OptionParser(option_list = option_list)) |
88 | 86 |
89 opt <- replace(opt, opt == "NA", NA) | 87 opt <- replace(opt, opt == "NA", NA) |
90 | 88 |
91 | 89 if (is.null(opt$temp_save)) { |
92 | 90 temp_save <- FALSE |
93 | |
94 if (is.null(opt$temp_save)){ | |
95 temp_save<-FALSE | |
96 }else{ | 91 }else{ |
97 temp_save<-TRUE | 92 temp_save <- TRUE |
98 } | 93 } |
99 | 94 |
100 if (is.null(opt$remove_spectra)){ | 95 if (is.null(opt$remove_spectra)) { |
101 remove_spectra<-FALSE | 96 remove_spectra <- FALSE |
102 }else{ | 97 }else{ |
103 remove_spectra<-TRUE | 98 remove_spectra <- TRUE |
104 } | 99 } |
105 | 100 |
106 | 101 |
107 print(opt) | 102 print(opt) |
108 | 103 |
109 getxcmsSetObject <- function(xobject) { | 104 getxcmsSetObject <- function(xobject) { |
110 # XCMS 1.x | 105 # XCMS 1.x |
111 if (class(xobject) == "xcmsSet") | 106 if (class(xobject) == "xcmsSet") |
112 return (xobject) | 107 return(xobject) |
113 # XCMS 3.x | 108 # XCMS 3.x |
114 if (class(xobject) == "XCMSnExp") { | 109 if (class(xobject) == "XCMSnExp") { |
115 # Get the legacy xcmsSet object | 110 # Get the legacy xcmsSet object |
116 suppressWarnings(xset <- as(xobject, 'xcmsSet')) | 111 suppressWarnings(xset <- as(xobject, "xcmsSet")) |
117 sampclass(xset) <- xset@phenoData$sample_group | 112 xcms::sampclass(xset) <- xset@phenoData$sample_group |
118 return (xset) | 113 return(xset) |
119 } | 114 } |
120 } | 115 } |
121 | 116 |
122 | 117 |
123 loadRData <- function(rdata_path, name){ | 118 loadRData <- function(rdata_path, name) { |
124 #loads an RData file, and returns the named xset object if it is there | 119 #loads an RData file, and returns the named xset object if it is there |
125 load(rdata_path) | 120 load(rdata_path) |
126 return(get(ls()[ls() %in% name])) | 121 return(get(ls()[ls() %in% name])) |
127 } | 122 } |
128 | 123 |
129 xset <- getxcmsSetObject(loadRData(opt$xset_path, c('xset','xdata'))) | 124 xset <- getxcmsSetObject(loadRData(opt$xset_path, c("xset", "xdata"))) |
130 | 125 |
131 print(xset) | 126 print(xset) |
132 if (is.null(opt$samplelist)){ | 127 if (is.null(opt$samplelist)) { |
133 blank_class <- opt$blank_class | 128 blank_class <- opt$blank_class |
134 }else{ | 129 }else{ |
135 samplelist <- read.table(opt$samplelist, sep='\t', header=TRUE) | 130 samplelist <- read.table(opt$samplelist, sep = "\t", header = TRUE) |
136 samplelist_blank <- unique(samplelist$sample_class[samplelist$blank=='yes']) | 131 samplelist_blank <- unique(samplelist$sample_class[samplelist$blank == "yes"]) |
137 | 132 |
138 chosen_blank <- samplelist_blank[samplelist_blank %in% xset@phenoData$class] | 133 chosen_blank <- samplelist_blank[samplelist_blank %in% xset@phenoData$class] |
139 if (length(chosen_blank)>1){ | 134 if (length(chosen_blank) > 1) { |
140 print('ERROR: only 1 blank is currently allowed to be used with this tool') | 135 print("ERROR: only 1 blank is currently allowed to be used with this tool") |
141 quit() | 136 quit() |
142 } | 137 } |
143 blank_class <- as.character(chosen_blank) | 138 blank_class <- as.character(chosen_blank) |
144 print(blank_class) | 139 print(blank_class) |
145 } | 140 } |
146 | 141 |
147 | 142 |
148 if (is.null(opt$multilist)){ | 143 if (is.null(opt$multilist)) { |
149 ffrm_out <- flag_remove(xset, | 144 ffrm_out <- flag_remove(xset, |
150 pol=opt$polarity, | 145 pol = opt$polarity, |
151 rsd_i_blank=opt$rsd_i_blank, | 146 rsd_i_blank = opt$rsd_i_blank, |
152 minfrac_blank=opt$minfrac_blank, | 147 minfrac_blank = opt$minfrac_blank, |
153 rsd_rt_blank=opt$rsd_rt_blank, | 148 rsd_rt_blank = opt$rsd_rt_blank, |
154 ithres_blank=opt$ithres_blank, | 149 ithres_blank = opt$ithres_blank, |
155 s2b=opt$s2b, | 150 s2b = opt$s2b, |
156 ref.class=blank_class, | 151 ref.class = blank_class, |
157 egauss_thr=opt$egauss_thr, | 152 egauss_thr = opt$egauss_thr, |
158 rsd_i_sample=opt$rsd_i_sample, | 153 rsd_i_sample = opt$rsd_i_sample, |
159 minfrac_sample=opt$minfrac_sample, | 154 minfrac_sample = opt$minfrac_sample, |
160 rsd_rt_sample=opt$rsd_rt_sample, | 155 rsd_rt_sample = opt$rsd_rt_sample, |
161 ithres_sample=opt$ithres_sample, | 156 ithres_sample = opt$ithres_sample, |
162 minfrac_xcms=opt$minfrac_xcms, | 157 minfrac_xcms = opt$minfrac_xcms, |
163 mzwid=opt$mzwid, | 158 mzwid = opt$mzwid, |
164 bw=opt$bw, | 159 bw = opt$bw, |
165 out_dir=opt$out_dir, | 160 out_dir = opt$out_dir, |
166 temp_save=temp_save, | 161 temp_save = temp_save, |
167 remove_spectra=remove_spectra, | 162 remove_spectra = remove_spectra, |
168 grp_rm_ids=unlist(strsplit(as.character(opt$grp_rm_ids), split=", "))[[1]]) | 163 grp_rm_ids = unlist(strsplit(as.character(opt$grp_rm_ids), split = ", "))[[1]]) |
169 print('flag remove finished') | 164 print("flag remove finished") |
170 xset <- ffrm_out[[1]] | 165 xset <- ffrm_out[[1]] |
171 grp_peaklist <- ffrm_out[[2]] | 166 grp_peaklist <- ffrm_out[[2]] |
172 removed_peaks <- ffrm_out[[3]] | 167 removed_peaks <- ffrm_out[[3]] |
173 | 168 |
174 save.image(file=file.path(opt$out_dir, 'xset_filtered.RData'), version=2) | 169 save.image(file = file.path(opt$out_dir, "xset_filtered.RData"), version = 2) |
175 | 170 |
176 # grpid needed for mspurity ID needed for deconrank... (will clean up at some up) | 171 # grpid needed for mspurity ID needed for deconrank... (will clean up at some up) |
177 peak_pth <- file.path(opt$out_dir, 'peaklist_filtered.tsv') | 172 peak_pth <- file.path(opt$out_dir, "peaklist_filtered.tsv") |
178 print(peak_pth) | 173 print(peak_pth) |
179 write.table(data.frame('grpid'=rownames(grp_peaklist), 'ID'=rownames(grp_peaklist), grp_peaklist), | 174 write.table(data.frame("grpid" = rownames(grp_peaklist), "ID" = rownames(grp_peaklist), grp_peaklist), |
180 peak_pth, row.names=FALSE, sep='\t') | 175 peak_pth, row.names = FALSE, sep = "\t") |
181 | 176 |
182 removed_peaks <- data.frame(removed_peaks) | 177 removed_peaks <- data.frame(removed_peaks) |
183 write.table(data.frame('ID'=rownames(removed_peaks),removed_peaks), | 178 write.table(data.frame("ID" = rownames(removed_peaks), removed_peaks), |
184 file.path(opt$out_dir, 'removed_peaks.tsv'), row.names=FALSE, sep='\t') | 179 file.path(opt$out_dir, "removed_peaks.tsv"), row.names = FALSE, sep = "\t") |
185 | 180 |
186 }else{ | 181 }else{ |
187 | 182 |
188 | 183 # nolint start |
189 # TODO | 184 # TODO |
190 #xsets <- split(xset, multilist_df$multlist) | 185 #xsets <- split(xset, multilist_df$multlist) |
191 # | 186 # |
192 #mult_grps <- unique(multilist_df$multlist) | 187 #mult_grps <- unique(multilist_df$multlist) |
193 # | 188 # |
194 #for (mgrp in mult_grps){ | 189 #for (mgrp in mult_grps){ |
195 # xset_i <- xsets[mgrp] | 190 # xset_i <- xsets[mgrp] |
196 # xcms::group(xset_i, | 191 # xcms::group(xset_i, |
197 # | 192 # |
198 # } | 193 # } |
199 | 194 # nolint end |
200 | 195 |
201 | 196 |
202 } | 197 } |
203 | |
204 |