Mercurial > repos > computational-metabolomics > mspurity_createdatabase
comparison dimsPredictPuritySingle.R @ 8:efd14b326007 draft
planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 7e1748612a9f9dce11a9e54ff36752b600e7aea3
author | computational-metabolomics |
---|---|
date | Wed, 12 Jun 2024 16:05:01 +0000 |
parents | 2f71b3495221 |
children |
comparison
equal
deleted
inserted
replaced
7:0cc6b67dccb8 | 8:efd14b326007 |
---|---|
1 library(msPurity) | 1 library(msPurity) |
2 library(optparse) | 2 library(optparse) |
3 print(sessionInfo()) | 3 print(sessionInfo()) |
4 | 4 |
5 option_list <- list( | 5 option_list <- list( |
6 make_option(c("--mzML_file"), type = "character"), | 6 make_option(c("--mzML_file"), type = "character"), |
7 make_option(c("--mzML_files"), type = "character"), | 7 make_option(c("--mzML_files"), type = "character"), |
8 make_option(c("--mzML_filename"), type = "character", default = ""), | 8 make_option(c("--mzML_filename"), type = "character", default = ""), |
9 make_option(c("--mzML_galaxy_names"), type = "character", default = ""), | 9 make_option(c("--mzML_galaxy_names"), type = "character", default = ""), |
10 make_option(c("--peaks_file"), type = "character"), | 10 make_option(c("--peaks_file"), type = "character"), |
11 make_option(c("-o", "--out_dir"), type = "character"), | 11 make_option(c("-o", "--out_dir"), type = "character"), |
12 make_option("--minoffset", default = 0.5), | 12 make_option("--minoffset", default = 0.5), |
13 make_option("--maxoffset", default = 0.5), | 13 make_option("--maxoffset", default = 0.5), |
14 make_option("--ilim", default = 0.05), | 14 make_option("--ilim", default = 0.05), |
15 make_option("--ppm", default = 4), | 15 make_option("--ppm", default = 4), |
16 make_option("--dimspy", action = "store_true"), | 16 make_option("--dimspy", action = "store_true"), |
17 make_option("--sim", action = "store_true"), | 17 make_option("--sim", action = "store_true"), |
18 make_option("--remove_nas", action = "store_true"), | 18 make_option("--remove_nas", action = "store_true"), |
19 make_option("--iwNorm", default = "none", type = "character"), | 19 make_option("--iwNorm", default = "none", type = "character"), |
20 make_option("--file_num_dimspy", default = 1), | 20 make_option("--file_num_dimspy", default = 1), |
21 make_option("--exclude_isotopes", action = "store_true"), | 21 make_option("--exclude_isotopes", action = "store_true"), |
22 make_option("--isotope_matrix", type = "character") | 22 make_option("--isotope_matrix", type = "character") |
23 ) | 23 ) |
24 | 24 |
25 # store options | 25 # store options |
26 opt <- parse_args(OptionParser(option_list = option_list)) | 26 opt <- parse_args(OptionParser(option_list = option_list)) |
27 | 27 |
41 mzML_filename <- trimws(mzML_filename) | 41 mzML_filename <- trimws(mzML_filename) |
42 mzML_files <- str_to_vec(mzML_files) | 42 mzML_files <- str_to_vec(mzML_files) |
43 galaxy_names <- str_to_vec(galaxy_names) | 43 galaxy_names <- str_to_vec(galaxy_names) |
44 if (mzML_filename %in% galaxy_names) { | 44 if (mzML_filename %in% galaxy_names) { |
45 return(mzML_files[galaxy_names == mzML_filename]) | 45 return(mzML_files[galaxy_names == mzML_filename]) |
46 }else{ | 46 } else { |
47 stop(paste("mzML file not found - ", mzML_filename)) | 47 stop(paste("mzML file not found - ", mzML_filename)) |
48 } | 48 } |
49 } | 49 } |
50 | 50 |
51 | 51 |
52 if (is.null(opt$dimspy)) { | 52 if (is.null(opt$dimspy)) { |
53 df <- read.table(opt$peaks_file, header = TRUE, sep = "\t") | 53 df <- read.table(opt$peaks_file, header = TRUE, sep = "\t") |
54 if (file.exists(opt$mzML_file)) { | 54 if (file.exists(opt$mzML_file)) { |
55 mzML_file <- opt$mzML_file | 55 mzML_file <- opt$mzML_file |
56 }else if (!is.null(opt$mzML_files)) { | 56 } else if (!is.null(opt$mzML_files)) { |
57 mzML_file <- find_mzml_file(opt$mzML_files, opt$mzML_galaxy_names, | 57 mzML_file <- find_mzml_file( |
58 opt$mzML_filename) | 58 opt$mzML_files, opt$mzML_galaxy_names, |
59 }else{ | 59 opt$mzML_filename |
60 ) | |
61 } else { | |
60 mzML_file <- file.path(opt$mzML_file, filename) | 62 mzML_file <- file.path(opt$mzML_file, filename) |
61 } | 63 } |
62 }else{ | 64 } else { |
63 indf <- read.table(opt$peaks_file, | 65 indf <- read.table(opt$peaks_file, |
64 header = TRUE, sep = "\t", stringsAsFactors = FALSE) | 66 header = TRUE, sep = "\t", stringsAsFactors = FALSE |
67 ) | |
65 | 68 |
66 filename <- colnames(indf)[8:ncol(indf)][opt$file_num_dimspy] | 69 filename <- colnames(indf)[8:ncol(indf)][opt$file_num_dimspy] |
67 print(filename) | 70 print(filename) |
68 # check if the data file is mzML or RAW (can only use mzML currently) so | 71 # check if the data file is mzML or RAW (can only use mzML currently) so |
69 # we expect an mzML file of the same name in the same folder | 72 # we expect an mzML file of the same name in the same folder |
73 filename <- sub("raw", "mzML", filename, ignore.case = TRUE) | 76 filename <- sub("raw", "mzML", filename, ignore.case = TRUE) |
74 print(filename) | 77 print(filename) |
75 | 78 |
76 if (file.exists(opt$mzML_file)) { | 79 if (file.exists(opt$mzML_file)) { |
77 mzML_file <- opt$mzML_file | 80 mzML_file <- opt$mzML_file |
78 }else if (!is.null(opt$mzML_files)) { | 81 } else if (!is.null(opt$mzML_files)) { |
79 mzML_file <- find_mzml_file(opt$mzML_files, opt$mzML_galaxy_names, filename) | 82 mzML_file <- find_mzml_file(opt$mzML_files, opt$mzML_galaxy_names, filename) |
80 }else{ | 83 } else { |
81 mzML_file <- file.path(opt$mzML_file, filename) | 84 mzML_file <- file.path(opt$mzML_file, filename) |
82 } | 85 } |
83 | 86 |
84 # Update the dimspy output with the correct information | 87 # Update the dimspy output with the correct information |
85 df <- indf[4:nrow(indf), ] | 88 df <- indf[4:nrow(indf), ] |
93 } | 96 } |
94 df$mz <- as.numeric(df$mz) | 97 df$mz <- as.numeric(df$mz) |
95 } | 98 } |
96 | 99 |
97 if (!is.null(opt$remove_nas)) { | 100 if (!is.null(opt$remove_nas)) { |
98 df <- df[!is.na(df$mz), ] | 101 df <- df[!is.na(df$mz), ] |
99 } | 102 } |
100 | 103 |
101 if (is.null(opt$isotope_matrix)) { | 104 if (is.null(opt$isotope_matrix)) { |
102 im <- NULL | 105 im <- NULL |
103 }else{ | 106 } else { |
104 im <- read.table(opt$isotope_matrix, | 107 im <- read.table(opt$isotope_matrix, |
105 header = TRUE, sep = "\t", stringsAsFactors = FALSE) | 108 header = TRUE, sep = "\t", stringsAsFactors = FALSE |
109 ) | |
106 } | 110 } |
107 | 111 |
108 if (is.null(opt$exclude_isotopes)) { | 112 if (is.null(opt$exclude_isotopes)) { |
109 isotopes <- FALSE | 113 isotopes <- FALSE |
110 }else{ | 114 } else { |
111 isotopes <- TRUE | 115 isotopes <- TRUE |
112 } | 116 } |
113 | 117 |
114 if (is.null(opt$sim)) { | 118 if (is.null(opt$sim)) { |
115 sim <- FALSE | 119 sim <- FALSE |
116 }else{ | 120 } else { |
117 sim <- TRUE | 121 sim <- TRUE |
118 } | 122 } |
119 | 123 |
120 minOffset <- as.numeric(opt$minoffset) | 124 minOffset <- as.numeric(opt$minoffset) |
121 maxOffset <- as.numeric(opt$maxoffset) | 125 maxOffset <- as.numeric(opt$maxoffset) |
122 | 126 |
123 if (opt$iwNorm == "none") { | 127 if (opt$iwNorm == "none") { |
124 iwNorm <- FALSE | 128 iwNorm <- FALSE |
125 iwNormFun <- NULL | 129 iwNormFun <- NULL |
126 }else if (opt$iwNorm == "gauss") { | 130 } else if (opt$iwNorm == "gauss") { |
127 iwNorm <- TRUE | 131 iwNorm <- TRUE |
128 iwNormFun <- msPurity::iwNormGauss(minOff = -minOffset, maxOff = maxOffset) | 132 iwNormFun <- msPurity::iwNormGauss(minOff = -minOffset, maxOff = maxOffset) |
129 }else if (opt$iwNorm == "rcosine") { | 133 } else if (opt$iwNorm == "rcosine") { |
130 iwNorm <- TRUE | 134 iwNorm <- TRUE |
131 iwNormFun <- msPurity::iwNormRcosine(minOff = -minOffset, maxOff = maxOffset) | 135 iwNormFun <- msPurity::iwNormRcosine(minOff = -minOffset, maxOff = maxOffset) |
132 }else if (opt$iwNorm == "QE5") { | 136 } else if (opt$iwNorm == "QE5") { |
133 iwNorm <- TRUE | 137 iwNorm <- TRUE |
134 iwNormFun <- msPurity::iwNormQE.5() | 138 iwNormFun <- msPurity::iwNormQE.5() |
135 } | 139 } |
136 | 140 |
137 print("FIRST ROWS OF PEAK FILE") | 141 print("FIRST ROWS OF PEAK FILE") |
138 print(head(df)) | 142 print(head(df)) |
139 print(mzML_file) | 143 print(mzML_file) |
140 predicted <- msPurity::dimsPredictPuritySingle(df$mz, | 144 predicted <- msPurity::dimsPredictPuritySingle(df$mz, |
141 filepth = mzML_file, | 145 filepth = mzML_file, |
142 minOffset = minOffset, | 146 minOffset = minOffset, |
143 maxOffset = maxOffset, | 147 maxOffset = maxOffset, |
144 ppm = opt$ppm, | 148 ppm = opt$ppm, |
145 mzML = TRUE, | 149 mzML = TRUE, |
146 sim = sim, | 150 sim = sim, |
147 ilim = opt$ilim, | 151 ilim = opt$ilim, |
148 isotopes = isotopes, | 152 isotopes = isotopes, |
149 im = im, | 153 im = im, |
150 iwNorm = iwNorm, | 154 iwNorm = iwNorm, |
151 iwNormFun = iwNormFun | 155 iwNormFun = iwNormFun |
152 ) | 156 ) |
153 predicted <- cbind(df, predicted) | 157 predicted <- cbind(df, predicted) |
154 | 158 |
155 print(head(predicted)) | 159 print(head(predicted)) |
156 print(file.path(opt$out_dir, "dimsPredictPuritySingle_output.tsv")) | 160 print(file.path(opt$out_dir, "dimsPredictPuritySingle_output.tsv")) |
157 | 161 |
158 write.table(predicted, | 162 write.table(predicted, |
159 file.path(opt$out_dir, "dimsPredictPuritySingle_output.tsv"), | 163 file.path(opt$out_dir, "dimsPredictPuritySingle_output.tsv"), |
160 row.names = FALSE, sep = "\t") | 164 row.names = FALSE, sep = "\t" |
165 ) |