mspurity_createdatabase: dimsPredictPuritySingle.R comparison

comparison dimsPredictPuritySingle.R @ 6:2f71b3495221 draft

"planemo upload for repository https://github.com/computational-metabolomics/mspurity-galaxy commit 2579c8746819670348c378f86116f83703c493eb"

author	computational-metabolomics
date	Thu, 04 Mar 2021 12:27:21 +0000
parents	f52287a06c02
children	efd14b326007

comparison

equal deleted inserted replaced

-:3ec6fd8e4c17
+:2f71b3495221
 library(msPurity)
 library(optparse)
 print(sessionInfo())
 option_list <- list(
-make_option(c("--mzML_file"), type="character"),
+make_option(c("--mzML_file"), type = "character"),
-make_option(c("--mzML_files"), type="character"),
+make_option(c("--mzML_files"), type = "character"),
-make_option(c("--mzML_filename"), type="character", default=''),
+make_option(c("--mzML_filename"), type = "character", default = ""),
-make_option(c("--mzML_galaxy_names"), type="character", default=''),
+make_option(c("--mzML_galaxy_names"), type = "character", default = ""),
-make_option(c("--peaks_file"), type="character"),
+make_option(c("--peaks_file"), type = "character"),
-make_option(c("-o", "--out_dir"), type="character"),
+make_option(c("-o", "--out_dir"), type = "character"),
-make_option("--minoffset", default=0.5),
+make_option("--minoffset", default = 0.5),
-make_option("--maxoffset", default=0.5),
+make_option("--maxoffset", default = 0.5),
-make_option("--ilim", default=0.05),
+make_option("--ilim", default = 0.05),
-make_option("--ppm", default=4),
+make_option("--ppm", default = 4),
-make_option("--dimspy", action="store_true"),
+make_option("--dimspy", action = "store_true"),
-make_option("--sim", action="store_true"),
+make_option("--sim", action = "store_true"),
-make_option("--remove_nas", action="store_true"),
+make_option("--remove_nas", action = "store_true"),
-make_option("--iwNorm", default="none", type="character"),
+make_option("--iwNorm", default = "none", type = "character"),
-make_option("--file_num_dimspy", default=1),
+make_option("--file_num_dimspy", default = 1),
-make_option("--exclude_isotopes", action="store_true"),
+make_option("--exclude_isotopes", action = "store_true"),
-make_option("--isotope_matrix", type="character")
+make_option("--isotope_matrix", type = "character")
 )
 # store options
-opt<- parse_args(OptionParser(option_list=option_list))
+opt <- parse_args(OptionParser(option_list = option_list))
 print(sessionInfo())
 print(opt)
 print(opt$mzML_files)
 print(opt$mzML_galaxy_names)
-str_to_vec <- function(x){
+str_to_vec <- function(x) {
 print(x)
-x <- trimws(strsplit(x, ',')[[1]])
+x <- trimws(strsplit(x, ",")[[1]])
 return(x[x != ""])
 }
-find_mzml_file <- function(mzML_files, galaxy_names, mzML_filename){
+find_mzml_file <- function(mzML_files, galaxy_names, mzML_filename) {
 mzML_filename <- trimws(mzML_filename)
 mzML_files <- str_to_vec(mzML_files)
 galaxy_names <- str_to_vec(galaxy_names)
-if (mzML_filename %in% galaxy_names){
+if (mzML_filename %in% galaxy_names) {
-return(mzML_files[galaxy_names==mzML_filename])
+return(mzML_files[galaxy_names == mzML_filename])
 }else{
 stop(paste("mzML file not found - ", mzML_filename))
 }
 }
-if (is.null(opt$dimspy)){
+if (is.null(opt$dimspy)) {
-df <- read.table(opt$peaks_file, header = TRUE, sep='\t')
+df <- read.table(opt$peaks_file, header = TRUE, sep = "\t")
-if (file.exists(opt$mzML_file)){
+if (file.exists(opt$mzML_file)) {
 mzML_file <- opt$mzML_file
-}else if (!is.null(opt$mzML_files)){
+}else if (!is.null(opt$mzML_files)) {
 mzML_file <- find_mzml_file(opt$mzML_files, opt$mzML_galaxy_names,
 opt$mzML_filename)
 }else{
 mzML_file <- file.path(opt$mzML_file, filename)
 }
 }else{
 indf <- read.table(opt$peaks_file,
-header = TRUE, sep='\t', stringsAsFactors = FALSE)
+header = TRUE, sep = "\t", stringsAsFactors = FALSE)
 filename <- colnames(indf)[8:ncol(indf)][opt$file_num_dimspy]
 print(filename)
 # check if the data file is mzML or RAW (can only use mzML currently) so
 # we expect an mzML file of the same name in the same folder
-indf$i <- indf[,colnames(indf)==filename]
+indf$i <- indf[, colnames(indf) == filename]
-indf[,colnames(indf)==filename] <- as.numeric(indf[,colnames(indf)==filename])
+indf[, colnames(indf) == filename] <- as.numeric(indf[, colnames(indf) == filename])
-filename = sub("raw", "mzML", filename, ignore.case = TRUE)
+filename <- sub("raw", "mzML", filename, ignore.case = TRUE)
 print(filename)
+if (file.exists(opt$mzML_file)) {
-if (file.exists(opt$mzML_file)){
 mzML_file <- opt$mzML_file
-}else if (!is.null(opt$mzML_files)){
+}else if (!is.null(opt$mzML_files)) {
 mzML_file <- find_mzml_file(opt$mzML_files, opt$mzML_galaxy_names, filename)
 }else{
 mzML_file <- file.path(opt$mzML_file, filename)
-}
-# Update the dimspy output with the correct information
-df <- indf[4:nrow(indf),]
-if ('blank_flag' %in% colnames(df)){
-df <- df[df$blank_flag==1,]
 }
-colnames(df)[colnames(df)=='m.z'] <- 'mz'
+# Update the dimspy output with the correct information
-if ('nan' %in% df$mz){
+df <- indf[4:nrow(indf), ]
-df[df$mz=='nan',]$mz <- NA
+if ("blank_flag" %in% colnames(df)) {
+df <- df[df$blank_flag == 1, ]
+}
+colnames(df)[colnames(df) == "m.z"] <- "mz"
+if ("nan" %in% df$mz) {
+df[df$mz == "nan", ]$mz <- NA
 }
 df$mz <- as.numeric(df$mz)
 }
-if (!is.null(opt$remove_nas)){
+if (!is.null(opt$remove_nas)) {
-df <- df[!is.na(df$mz),]
+df <- df[!is.na(df$mz), ]
 }
-if (is.null(opt$isotope_matrix)){
+if (is.null(opt$isotope_matrix)) {
 im <- NULL
 }else{
 im <- read.table(opt$isotope_matrix,
-header = TRUE, sep='\t', stringsAsFactors = FALSE)
+header = TRUE, sep = "\t", stringsAsFactors = FALSE)
 }
-if (is.null(opt$exclude_isotopes)){
+if (is.null(opt$exclude_isotopes)) {
 isotopes <- FALSE
 }else{
 isotopes <- TRUE
 }
-if (is.null(opt$sim)){
+if (is.null(opt$sim)) {
-sim=FALSE
+sim <- FALSE
 }else{
-sim=TRUE
+sim <- TRUE
 }
-minOffset = as.numeric(opt$minoffset)
+minOffset <- as.numeric(opt$minoffset)
-maxOffset = as.numeric(opt$maxoffset)
+maxOffset <- as.numeric(opt$maxoffset)
-if (opt$iwNorm=='none'){
+if (opt$iwNorm == "none") {
-iwNorm = FALSE
+iwNorm <- FALSE
-iwNormFun = NULL
+iwNormFun <- NULL
-}else if (opt$iwNorm=='gauss'){
+}else if (opt$iwNorm == "gauss") {
-iwNorm = TRUE
+iwNorm <- TRUE
-iwNormFun = msPurity::iwNormGauss(minOff=-minOffset, maxOff=maxOffset)
+iwNormFun <- msPurity::iwNormGauss(minOff = -minOffset, maxOff = maxOffset)
-}else if (opt$iwNorm=='rcosine'){
+}else if (opt$iwNorm == "rcosine") {
-iwNorm = TRUE
+iwNorm <- TRUE
-iwNormFun = msPurity::iwNormRcosine(minOff=-minOffset, maxOff=maxOffset)
+iwNormFun <- msPurity::iwNormRcosine(minOff = -minOffset, maxOff = maxOffset)
-}else if (opt$iwNorm=='QE5'){
+}else if (opt$iwNorm == "QE5") {
-iwNorm = TRUE
+iwNorm <- TRUE
-iwNormFun = msPurity::iwNormQE.5()
+iwNormFun <- msPurity::iwNormQE.5()
 }
-print('FIRST ROWS OF PEAK FILE')
+print("FIRST ROWS OF PEAK FILE")
 print(head(df))
 print(mzML_file)
 predicted <- msPurity::dimsPredictPuritySingle(df$mz,
-filepth=mzML_file,
+filepth = mzML_file,
-minOffset=minOffset,
+minOffset = minOffset,
-maxOffset=maxOffset,
+maxOffset = maxOffset,
-ppm=opt$ppm,
+ppm = opt$ppm,
-mzML=TRUE,
+mzML = TRUE,
 sim = sim,
 ilim = opt$ilim,
 isotopes = isotopes,
 im = im,
 iwNorm = iwNorm,
 iwNormFun = iwNormFun
 )
 predicted <- cbind(df, predicted)
 print(head(predicted))
-print(file.path(opt$out_dir, 'dimsPredictPuritySingle_output.tsv'))
+print(file.path(opt$out_dir, "dimsPredictPuritySingle_output.tsv"))
 write.table(predicted,
-file.path(opt$out_dir, 'dimsPredictPuritySingle_output.tsv'),
+file.path(opt$out_dir, "dimsPredictPuritySingle_output.tsv"),
-row.names=FALSE, sep='\t')
+row.names = FALSE, sep = "\t")

Mercurial > repos > computational-metabolomics > mspurity_createdatabase

comparison dimsPredictPuritySingle.R @ 6:2f71b3495221 draft