Mercurial > repos > azomics > metacyto_search_cl
diff metacyto_search_clr.R @ 0:94ac403d134a draft default tip
"planemo upload for repository https://github.com/AstraZeneca-Omics/immport-galaxy-tools/tree/master/flowtools/metacyto_search_clr commit a1b796a09f6b30919a73b5ded0ce5a6378317007"
author | azomics |
---|---|
date | Wed, 28 Jul 2021 22:02:38 +0000 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/metacyto_search_clr.R Wed Jul 28 22:02:38 2021 +0000 @@ -0,0 +1,191 @@ +#!/usr/bin/env Rscript +###################################################################### +# Copyright (c) 2018 Northrop Grumman. +# All rights reserved. +###################################################################### +# +# Version 1 - January 2018 +# Author: Cristel Thomas +# +# + +library(flowCore) +library(MetaCyto) + +check_cluster_def <- function(cl_def) { + if (cl_def == "" || cl_def == "None") { + quit(save = "no", status = 14, runLast = FALSE) + } else { + tmp <- gsub(" ", "", cl_def, fixed = TRUE) + clean_def <- gsub(",", "|", tmp, fixed = TRUE) + return(toupper(clean_def)) + } +} + +path_to_group_file <- function(path_to_result) { + grp <- basename(dirname(path_to_result)) + return(paste(grp, "fcs", sep = ".", collapse = NULL)) +} + +group_file_to_group_name <- function(result_file) { + return(strsplit(result_file, ".", fixed = TRUE)[[1]][1]) +} + + +search_cluster_panels <- function(df, fcspaths, fcsnames, outdir="", uc="", + clusters=vector()) { + + working_dir <- "tmp_metacyto" + working_out <- "tmp_metacyto_out" + dir.create(working_dir) + dir.create(outdir) + + # reformat summary -- expects csv + 'fcs_names' && 'fcs_files' + new_df <- file.path(working_dir, "processed_sample_summary.csv") + df$fcs_names <- df$filenames + df$fcs_files <- df$filenames + write.csv(df, file = new_df, row.names = F) + + # move && rename FCS files to same directory + for (i in seq_len(length(fcspaths))) { + new_file <- file.path(working_dir, fcsnames[[i]]) + file.copy(fcspaths[[i]], new_file) + } + + searchCluster.batch(preprocessOutputFolder = working_dir, + outpath = working_out, + clusterLabel = clusters) + + result_files <- list.files(working_out, + pattern = "cluster_stats_in_each_sample", + recursive = T, + full.names = T) + + nb_groups <- length(fcsnames) + no_results <- vector() + if (length(result_files) != nb_groups) { + groups_with_results <- sapply(result_files, path_to_group_file) + ## one or more groups with no results, figure out which + no_results <- setdiff(fcsnames, groups_with_results) + } + + if (length(no_results) == nb_groups) { + sink(uc) + cat("No clusters were found in none of the groups.") + sink() + } else { + unused_clrs <- list() + + if (length(no_results > 0)) { + grp_no_results <- sapply(no_results, group_file_to_group_name) + unused_clrs <- data.frame("cluster_label" = "any", "not_found_in" = grp_no_results) + } + + for (result in result_files) { + group_name <- strsplit(result, .Platform$file.sep)[[1]][2] + new_filename <- paste(c(group_name, "cluster_stats.txt"), collapse = "_") + new_path <- file.path(outdir, new_filename) + tmp_df <- read.csv(result) + + used_clr <- as.character(unique(tmp_df$label)) + if (length(used_clr) != length(clusters)) { + unused <- setdiff(clusters, used_clr) + tmp_udf <- data.frame("cluster_label" = unused, "not_found_in" = group_name) + unused_clrs <- rbind(unused_clrs, tmp_udf) + } + colnames(tmp_df)[[1]] <- "group_name" + write.table(tmp_df, new_path, quote = F, row.names = F, col.names = T, sep = "\t") + } + + if (is.null(dim(unused_clrs))) { + sink(uc) + cat("All provided cluster definition were found in all provided FCS files.") + sink() + } else { + write.table(unused_clrs, uc, quote = F, row.names = F, col.names = T, sep = "\t") + } + } +} + + +check_input <- function(report="", outdir="", list_unused="", list_clusters="", + fcs_files=list(), grp_names=list(), clusters=vector()) { + # check FCS files + fcspaths <- unlist(fcs_files) + fcsnames <- unlist(grp_names) + ct_files <- 0 + some_pb <- FALSE + for (i in seq_len(length(fcspaths))) { + is_file_valid <- FALSE + tryCatch({ + fcs <- read.FCS(fcspaths[[i]], transformation = FALSE) + is_file_valid <- TRUE + }, error = function(ex) { + print(paste("File is not a valid FCS file:", fcsnames[[i]], ex)) + }) + if (is_file_valid) { + metacyto_pp_check <- if ("sample_id" %in% colnames(fcs)) TRUE else FALSE + if (metacyto_pp_check) { + idx <- length(colnames(fcs)) + ct_files <- ct_files + max(fcs@exprs[, idx]) + } else { + quit(save = "no", status = 11, runLast = FALSE) + } + } else { + some_pb <- TRUE + } + } + # check summary file format + df <- read.table(report, sep = "\t", header = T, colClasses = "character") + nm <- colnames(df) + check_ab <- if ("antibodies" %in% nm) TRUE else FALSE + check_sdy <- if ("study_id" %in% nm) TRUE else FALSE + + if (check_sdy && check_ab) { + # check that summary index compatible with FCSs in collection - by number of files == index nb + if (ct_files != length(df$antibodies)) { + quit(save = "no", status = 12, runLast = FALSE) + } + } else { + quit(save = "no", status = 13, runLast = FALSE) + } + + if (some_pb) { + quit(save = "no", status = 10, runLast = FALSE) + } else { + write.table(clusters, list_clusters, quote = F, row.names = F, col.names = F) + search_cluster_panels(df, fcspaths, fcsnames, outdir, list_unused, clusters) + } +} + +################################################################################ +################################################################################ +args <- commandArgs(trailingOnly = TRUE) + +i <- grep(args, pattern = "FCS_FILES") + +cluster_def <- vector() +cl_df <- args[3] +if (i > 6) { + ii <- i - 1 + more_cl <- args[6:ii] + cl_df <- c(cl_df, more_cl) +} +cluster_def <- sapply(cl_df, check_cluster_def) + +fcs_files <- list() +fcs_names <- list() +j <- 1 +m <- i + 1 +tmp_fcs <- args[m:length(args)] + +for (k in seq_len(length(tmp_fcs))) { + if (k %% 2) { + fcs_files[[j]] <- tmp_fcs[[k]] + fcs_names[[j]] <- tmp_fcs[[k + 1]] + j <- j + 1 + } +} + +check_input(args[1], args[2], args[4], args[5], fcs_files, fcs_names, + cluster_def)