Mercurial > repos > computational-metabolomics > mspurity_createdatabase
comparison dimsPredictPuritySingle.R @ 6:2f71b3495221 draft
"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2579c8746819670348c378f86116f83703c493eb"
author | computational-metabolomics |
---|---|
date | Thu, 04 Mar 2021 12:27:21 +0000 |
parents | f52287a06c02 |
children | efd14b326007 |
comparison
equal
deleted
inserted
replaced
5:3ec6fd8e4c17 | 6:2f71b3495221 |
---|---|
1 library(msPurity) | 1 library(msPurity) |
2 library(optparse) | 2 library(optparse) |
3 print(sessionInfo()) | 3 print(sessionInfo()) |
4 | 4 |
5 option_list <- list( | 5 option_list <- list( |
6 make_option(c("--mzML_file"), type="character"), | 6 make_option(c("--mzML_file"), type = "character"), |
7 make_option(c("--mzML_files"), type="character"), | 7 make_option(c("--mzML_files"), type = "character"), |
8 make_option(c("--mzML_filename"), type="character", default=''), | 8 make_option(c("--mzML_filename"), type = "character", default = ""), |
9 make_option(c("--mzML_galaxy_names"), type="character", default=''), | 9 make_option(c("--mzML_galaxy_names"), type = "character", default = ""), |
10 make_option(c("--peaks_file"), type="character"), | 10 make_option(c("--peaks_file"), type = "character"), |
11 make_option(c("-o", "--out_dir"), type="character"), | 11 make_option(c("-o", "--out_dir"), type = "character"), |
12 make_option("--minoffset", default=0.5), | 12 make_option("--minoffset", default = 0.5), |
13 make_option("--maxoffset", default=0.5), | 13 make_option("--maxoffset", default = 0.5), |
14 make_option("--ilim", default=0.05), | 14 make_option("--ilim", default = 0.05), |
15 make_option("--ppm", default=4), | 15 make_option("--ppm", default = 4), |
16 make_option("--dimspy", action="store_true"), | 16 make_option("--dimspy", action = "store_true"), |
17 make_option("--sim", action="store_true"), | 17 make_option("--sim", action = "store_true"), |
18 make_option("--remove_nas", action="store_true"), | 18 make_option("--remove_nas", action = "store_true"), |
19 make_option("--iwNorm", default="none", type="character"), | 19 make_option("--iwNorm", default = "none", type = "character"), |
20 make_option("--file_num_dimspy", default=1), | 20 make_option("--file_num_dimspy", default = 1), |
21 make_option("--exclude_isotopes", action="store_true"), | 21 make_option("--exclude_isotopes", action = "store_true"), |
22 make_option("--isotope_matrix", type="character") | 22 make_option("--isotope_matrix", type = "character") |
23 ) | 23 ) |
24 | 24 |
25 # store options | 25 # store options |
26 opt<- parse_args(OptionParser(option_list=option_list)) | 26 opt <- parse_args(OptionParser(option_list = option_list)) |
27 | 27 |
28 print(sessionInfo()) | 28 print(sessionInfo()) |
29 print(opt) | 29 print(opt) |
30 | 30 |
31 print(opt$mzML_files) | 31 print(opt$mzML_files) |
32 print(opt$mzML_galaxy_names) | 32 print(opt$mzML_galaxy_names) |
33 | 33 |
34 str_to_vec <- function(x){ | 34 str_to_vec <- function(x) { |
35 print(x) | 35 print(x) |
36 x <- trimws(strsplit(x, ',')[[1]]) | 36 x <- trimws(strsplit(x, ",")[[1]]) |
37 return(x[x != ""]) | 37 return(x[x != ""]) |
38 } | 38 } |
39 | 39 |
40 find_mzml_file <- function(mzML_files, galaxy_names, mzML_filename){ | 40 find_mzml_file <- function(mzML_files, galaxy_names, mzML_filename) { |
41 mzML_filename <- trimws(mzML_filename) | 41 mzML_filename <- trimws(mzML_filename) |
42 mzML_files <- str_to_vec(mzML_files) | 42 mzML_files <- str_to_vec(mzML_files) |
43 galaxy_names <- str_to_vec(galaxy_names) | 43 galaxy_names <- str_to_vec(galaxy_names) |
44 if (mzML_filename %in% galaxy_names){ | 44 if (mzML_filename %in% galaxy_names) { |
45 return(mzML_files[galaxy_names==mzML_filename]) | 45 return(mzML_files[galaxy_names == mzML_filename]) |
46 }else{ | 46 }else{ |
47 stop(paste("mzML file not found - ", mzML_filename)) | 47 stop(paste("mzML file not found - ", mzML_filename)) |
48 } | 48 } |
49 } | 49 } |
50 | 50 |
51 | 51 |
52 if (is.null(opt$dimspy)){ | 52 if (is.null(opt$dimspy)) { |
53 df <- read.table(opt$peaks_file, header = TRUE, sep='\t') | 53 df <- read.table(opt$peaks_file, header = TRUE, sep = "\t") |
54 if (file.exists(opt$mzML_file)){ | 54 if (file.exists(opt$mzML_file)) { |
55 mzML_file <- opt$mzML_file | 55 mzML_file <- opt$mzML_file |
56 }else if (!is.null(opt$mzML_files)){ | 56 }else if (!is.null(opt$mzML_files)) { |
57 mzML_file <- find_mzml_file(opt$mzML_files, opt$mzML_galaxy_names, | 57 mzML_file <- find_mzml_file(opt$mzML_files, opt$mzML_galaxy_names, |
58 opt$mzML_filename) | 58 opt$mzML_filename) |
59 }else{ | 59 }else{ |
60 mzML_file <- file.path(opt$mzML_file, filename) | 60 mzML_file <- file.path(opt$mzML_file, filename) |
61 } | 61 } |
62 }else{ | 62 }else{ |
63 indf <- read.table(opt$peaks_file, | 63 indf <- read.table(opt$peaks_file, |
64 header = TRUE, sep='\t', stringsAsFactors = FALSE) | 64 header = TRUE, sep = "\t", stringsAsFactors = FALSE) |
65 | 65 |
66 filename <- colnames(indf)[8:ncol(indf)][opt$file_num_dimspy] | 66 filename <- colnames(indf)[8:ncol(indf)][opt$file_num_dimspy] |
67 print(filename) | 67 print(filename) |
68 # check if the data file is mzML or RAW (can only use mzML currently) so | 68 # check if the data file is mzML or RAW (can only use mzML currently) so |
69 # we expect an mzML file of the same name in the same folder | 69 # we expect an mzML file of the same name in the same folder |
70 indf$i <- indf[,colnames(indf)==filename] | 70 indf$i <- indf[, colnames(indf) == filename] |
71 indf[,colnames(indf)==filename] <- as.numeric(indf[,colnames(indf)==filename]) | 71 indf[, colnames(indf) == filename] <- as.numeric(indf[, colnames(indf) == filename]) |
72 | 72 |
73 filename = sub("raw", "mzML", filename, ignore.case = TRUE) | 73 filename <- sub("raw", "mzML", filename, ignore.case = TRUE) |
74 print(filename) | 74 print(filename) |
75 | 75 |
76 | 76 if (file.exists(opt$mzML_file)) { |
77 if (file.exists(opt$mzML_file)){ | |
78 mzML_file <- opt$mzML_file | 77 mzML_file <- opt$mzML_file |
79 }else if (!is.null(opt$mzML_files)){ | 78 }else if (!is.null(opt$mzML_files)) { |
80 mzML_file <- find_mzml_file(opt$mzML_files, opt$mzML_galaxy_names, filename) | 79 mzML_file <- find_mzml_file(opt$mzML_files, opt$mzML_galaxy_names, filename) |
81 }else{ | 80 }else{ |
82 mzML_file <- file.path(opt$mzML_file, filename) | 81 mzML_file <- file.path(opt$mzML_file, filename) |
83 } | |
84 | |
85 # Update the dimspy output with the correct information | |
86 df <- indf[4:nrow(indf),] | |
87 if ('blank_flag' %in% colnames(df)){ | |
88 df <- df[df$blank_flag==1,] | |
89 } | 82 } |
90 colnames(df)[colnames(df)=='m.z'] <- 'mz' | 83 |
91 | 84 # Update the dimspy output with the correct information |
92 if ('nan' %in% df$mz){ | 85 df <- indf[4:nrow(indf), ] |
93 df[df$mz=='nan',]$mz <- NA | 86 if ("blank_flag" %in% colnames(df)) { |
87 df <- df[df$blank_flag == 1, ] | |
88 } | |
89 colnames(df)[colnames(df) == "m.z"] <- "mz" | |
90 | |
91 if ("nan" %in% df$mz) { | |
92 df[df$mz == "nan", ]$mz <- NA | |
94 } | 93 } |
95 df$mz <- as.numeric(df$mz) | 94 df$mz <- as.numeric(df$mz) |
96 } | 95 } |
97 | 96 |
98 if (!is.null(opt$remove_nas)){ | 97 if (!is.null(opt$remove_nas)) { |
99 df <- df[!is.na(df$mz),] | 98 df <- df[!is.na(df$mz), ] |
100 } | 99 } |
101 | 100 |
102 if (is.null(opt$isotope_matrix)){ | 101 if (is.null(opt$isotope_matrix)) { |
103 im <- NULL | 102 im <- NULL |
104 }else{ | 103 }else{ |
105 im <- read.table(opt$isotope_matrix, | 104 im <- read.table(opt$isotope_matrix, |
106 header = TRUE, sep='\t', stringsAsFactors = FALSE) | 105 header = TRUE, sep = "\t", stringsAsFactors = FALSE) |
107 } | 106 } |
108 | 107 |
109 if (is.null(opt$exclude_isotopes)){ | 108 if (is.null(opt$exclude_isotopes)) { |
110 isotopes <- FALSE | 109 isotopes <- FALSE |
111 }else{ | 110 }else{ |
112 isotopes <- TRUE | 111 isotopes <- TRUE |
113 } | 112 } |
114 | 113 |
115 if (is.null(opt$sim)){ | 114 if (is.null(opt$sim)) { |
116 sim=FALSE | 115 sim <- FALSE |
117 }else{ | 116 }else{ |
118 sim=TRUE | 117 sim <- TRUE |
119 } | 118 } |
120 | 119 |
121 minOffset = as.numeric(opt$minoffset) | 120 minOffset <- as.numeric(opt$minoffset) |
122 maxOffset = as.numeric(opt$maxoffset) | 121 maxOffset <- as.numeric(opt$maxoffset) |
123 | 122 |
124 if (opt$iwNorm=='none'){ | 123 if (opt$iwNorm == "none") { |
125 iwNorm = FALSE | 124 iwNorm <- FALSE |
126 iwNormFun = NULL | 125 iwNormFun <- NULL |
127 }else if (opt$iwNorm=='gauss'){ | 126 }else if (opt$iwNorm == "gauss") { |
128 iwNorm = TRUE | 127 iwNorm <- TRUE |
129 iwNormFun = msPurity::iwNormGauss(minOff=-minOffset, maxOff=maxOffset) | 128 iwNormFun <- msPurity::iwNormGauss(minOff = -minOffset, maxOff = maxOffset) |
130 }else if (opt$iwNorm=='rcosine'){ | 129 }else if (opt$iwNorm == "rcosine") { |
131 iwNorm = TRUE | 130 iwNorm <- TRUE |
132 iwNormFun = msPurity::iwNormRcosine(minOff=-minOffset, maxOff=maxOffset) | 131 iwNormFun <- msPurity::iwNormRcosine(minOff = -minOffset, maxOff = maxOffset) |
133 }else if (opt$iwNorm=='QE5'){ | 132 }else if (opt$iwNorm == "QE5") { |
134 iwNorm = TRUE | 133 iwNorm <- TRUE |
135 iwNormFun = msPurity::iwNormQE.5() | 134 iwNormFun <- msPurity::iwNormQE.5() |
136 } | 135 } |
137 | 136 |
138 print('FIRST ROWS OF PEAK FILE') | 137 print("FIRST ROWS OF PEAK FILE") |
139 print(head(df)) | 138 print(head(df)) |
140 print(mzML_file) | 139 print(mzML_file) |
141 predicted <- msPurity::dimsPredictPuritySingle(df$mz, | 140 predicted <- msPurity::dimsPredictPuritySingle(df$mz, |
142 filepth=mzML_file, | 141 filepth = mzML_file, |
143 minOffset=minOffset, | 142 minOffset = minOffset, |
144 maxOffset=maxOffset, | 143 maxOffset = maxOffset, |
145 ppm=opt$ppm, | 144 ppm = opt$ppm, |
146 mzML=TRUE, | 145 mzML = TRUE, |
147 sim = sim, | 146 sim = sim, |
148 ilim = opt$ilim, | 147 ilim = opt$ilim, |
149 isotopes = isotopes, | 148 isotopes = isotopes, |
150 im = im, | 149 im = im, |
151 iwNorm = iwNorm, | 150 iwNorm = iwNorm, |
152 iwNormFun = iwNormFun | 151 iwNormFun = iwNormFun |
153 ) | 152 ) |
154 predicted <- cbind(df, predicted) | 153 predicted <- cbind(df, predicted) |
155 | 154 |
156 print(head(predicted)) | 155 print(head(predicted)) |
157 print(file.path(opt$out_dir, 'dimsPredictPuritySingle_output.tsv')) | 156 print(file.path(opt$out_dir, "dimsPredictPuritySingle_output.tsv")) |
158 | 157 |
159 write.table(predicted, | 158 write.table(predicted, |
160 file.path(opt$out_dir, 'dimsPredictPuritySingle_output.tsv'), | 159 file.path(opt$out_dir, "dimsPredictPuritySingle_output.tsv"), |
161 row.names=FALSE, sep='\t') | 160 row.names = FALSE, sep = "\t") |
162 | |
163 |