Mercurial > repos > azomics > metacyto_search_cl
changeset 0:94ac403d134a draft default tip
"planemo upload for repository https://github.com/AstraZeneca-Omics/immport-galaxy-tools/tree/master/flowtools/metacyto_search_clr commit a1b796a09f6b30919a73b5ded0ce5a6378317007"
| author | azomics | 
|---|---|
| date | Wed, 28 Jul 2021 22:02:38 +0000 | 
| parents | |
| children | |
| files | metacyto_search_clr.R metacyto_search_clr.xml test-data/Group1.fcs test-data/Group2.fcs test-data/preprocess.metacyto_summary.txt | 
| diffstat | 5 files changed, 310 insertions(+), 0 deletions(-) [+] | 
line wrap: on
 line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/metacyto_search_clr.R Wed Jul 28 22:02:38 2021 +0000 @@ -0,0 +1,191 @@ +#!/usr/bin/env Rscript +###################################################################### +# Copyright (c) 2018 Northrop Grumman. +# All rights reserved. +###################################################################### +# +# Version 1 - January 2018 +# Author: Cristel Thomas +# +# + +library(flowCore) +library(MetaCyto) + +check_cluster_def <- function(cl_def) { + if (cl_def == "" || cl_def == "None") { + quit(save = "no", status = 14, runLast = FALSE) + } else { + tmp <- gsub(" ", "", cl_def, fixed = TRUE) + clean_def <- gsub(",", "|", tmp, fixed = TRUE) + return(toupper(clean_def)) + } +} + +path_to_group_file <- function(path_to_result) { + grp <- basename(dirname(path_to_result)) + return(paste(grp, "fcs", sep = ".", collapse = NULL)) +} + +group_file_to_group_name <- function(result_file) { + return(strsplit(result_file, ".", fixed = TRUE)[[1]][1]) +} + + +search_cluster_panels <- function(df, fcspaths, fcsnames, outdir="", uc="", + clusters=vector()) { + + working_dir <- "tmp_metacyto" + working_out <- "tmp_metacyto_out" + dir.create(working_dir) + dir.create(outdir) + + # reformat summary -- expects csv + 'fcs_names' && 'fcs_files' + new_df <- file.path(working_dir, "processed_sample_summary.csv") + df$fcs_names <- df$filenames + df$fcs_files <- df$filenames + write.csv(df, file = new_df, row.names = F) + + # move && rename FCS files to same directory + for (i in seq_len(length(fcspaths))) { + new_file <- file.path(working_dir, fcsnames[[i]]) + file.copy(fcspaths[[i]], new_file) + } + + searchCluster.batch(preprocessOutputFolder = working_dir, + outpath = working_out, + clusterLabel = clusters) + + result_files <- list.files(working_out, + pattern = "cluster_stats_in_each_sample", + recursive = T, + full.names = T) + + nb_groups <- length(fcsnames) + no_results <- vector() + if (length(result_files) != nb_groups) { + groups_with_results <- sapply(result_files, path_to_group_file) + ## one or more groups with no results, figure out which + no_results <- setdiff(fcsnames, groups_with_results) + } + + if (length(no_results) == nb_groups) { + sink(uc) + cat("No clusters were found in none of the groups.") + sink() + } else { + unused_clrs <- list() + + if (length(no_results > 0)) { + grp_no_results <- sapply(no_results, group_file_to_group_name) + unused_clrs <- data.frame("cluster_label" = "any", "not_found_in" = grp_no_results) + } + + for (result in result_files) { + group_name <- strsplit(result, .Platform$file.sep)[[1]][2] + new_filename <- paste(c(group_name, "cluster_stats.txt"), collapse = "_") + new_path <- file.path(outdir, new_filename) + tmp_df <- read.csv(result) + + used_clr <- as.character(unique(tmp_df$label)) + if (length(used_clr) != length(clusters)) { + unused <- setdiff(clusters, used_clr) + tmp_udf <- data.frame("cluster_label" = unused, "not_found_in" = group_name) + unused_clrs <- rbind(unused_clrs, tmp_udf) + } + colnames(tmp_df)[[1]] <- "group_name" + write.table(tmp_df, new_path, quote = F, row.names = F, col.names = T, sep = "\t") + } + + if (is.null(dim(unused_clrs))) { + sink(uc) + cat("All provided cluster definition were found in all provided FCS files.") + sink() + } else { + write.table(unused_clrs, uc, quote = F, row.names = F, col.names = T, sep = "\t") + } + } +} + + +check_input <- function(report="", outdir="", list_unused="", list_clusters="", + fcs_files=list(), grp_names=list(), clusters=vector()) { + # check FCS files + fcspaths <- unlist(fcs_files) + fcsnames <- unlist(grp_names) + ct_files <- 0 + some_pb <- FALSE + for (i in seq_len(length(fcspaths))) { + is_file_valid <- FALSE + tryCatch({ + fcs <- read.FCS(fcspaths[[i]], transformation = FALSE) + is_file_valid <- TRUE + }, error = function(ex) { + print(paste("File is not a valid FCS file:", fcsnames[[i]], ex)) + }) + if (is_file_valid) { + metacyto_pp_check <- if ("sample_id" %in% colnames(fcs)) TRUE else FALSE + if (metacyto_pp_check) { + idx <- length(colnames(fcs)) + ct_files <- ct_files + max(fcs@exprs[, idx]) + } else { + quit(save = "no", status = 11, runLast = FALSE) + } + } else { + some_pb <- TRUE + } + } + # check summary file format + df <- read.table(report, sep = "\t", header = T, colClasses = "character") + nm <- colnames(df) + check_ab <- if ("antibodies" %in% nm) TRUE else FALSE + check_sdy <- if ("study_id" %in% nm) TRUE else FALSE + + if (check_sdy && check_ab) { + # check that summary index compatible with FCSs in collection - by number of files == index nb + if (ct_files != length(df$antibodies)) { + quit(save = "no", status = 12, runLast = FALSE) + } + } else { + quit(save = "no", status = 13, runLast = FALSE) + } + + if (some_pb) { + quit(save = "no", status = 10, runLast = FALSE) + } else { + write.table(clusters, list_clusters, quote = F, row.names = F, col.names = F) + search_cluster_panels(df, fcspaths, fcsnames, outdir, list_unused, clusters) + } +} + +################################################################################ +################################################################################ +args <- commandArgs(trailingOnly = TRUE) + +i <- grep(args, pattern = "FCS_FILES") + +cluster_def <- vector() +cl_df <- args[3] +if (i > 6) { + ii <- i - 1 + more_cl <- args[6:ii] + cl_df <- c(cl_df, more_cl) +} +cluster_def <- sapply(cl_df, check_cluster_def) + +fcs_files <- list() +fcs_names <- list() +j <- 1 +m <- i + 1 +tmp_fcs <- args[m:length(args)] + +for (k in seq_len(length(tmp_fcs))) { + if (k %% 2) { + fcs_files[[j]] <- tmp_fcs[[k]] + fcs_names[[j]] <- tmp_fcs[[k + 1]] + j <- j + 1 + } +} + +check_input(args[1], args[2], args[4], args[5], fcs_files, fcs_names, + cluster_def)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/metacyto_search_clr.xml Wed Jul 28 22:02:38 2021 +0000 @@ -0,0 +1,108 @@ +<tool id="metacyto_search_clr" name="Guided identification of clusters" version="1.0+galaxy0" profile="18.01"> + <description>using MetaCyto</description> + <requirements> + <requirement type="package" version="1.4.0">bioconductor-metacyto</requirement> + </requirements> + <stdio> + <exit_code range="1:9" /> + <exit_code range="10" level="fatal" description="Please provide valid input FCS files." /> + <exit_code range="11" level="fatal" description="Please provide FCS files pre-processed for MetaCyto." /> + <exit_code range="12" level="fatal" description="Pre-processing summary doesn't match the set of FCS files." /> + <exit_code range="13" level="fatal" description="The pre-processing summary is in the wrong format." /> + <exit_code range="14" level="fatal" description="Please provide a cluster definition" /> + <exit_code range="15:" /> + </stdio> + <command><![CDATA[ + Rscript --slave --vanilla '$__tool_directory__/metacyto_search_clr.R' '${summary}' 'fcs_stats' '${first_def}' '${unused}' '${cluster_list}' + #for $r in $cl_df + '${r.cluster_def}' + #end for + 'FCS_FILES' + #for $f in $group + '${f}' '${f.name}' + #end for + ]]> + </command> + <inputs> + <param format="metacyto_summary.txt" name="summary" type="data" label="MetaCyto preprocessing summary"/> + <param format="fcs" name="group" type="data_collection" collection_type="list" label="FCS files Collection pre-processed for MetaCyto"/> + <param name="first_def" type="text" label="Cluster definition" help="i.e.:CD3+,CD4-,CD8+,CCR7+"/> + <repeat name="cl_df" title="Cluster"> + <param name="cluster_def" type="text" label="Additional cluster definition" help="i.e.:CD3+,CD4-,CD8+,CCR7+"/> + </repeat> + </inputs> + <outputs> + <data format="tabular" name="unused" label="List of clusters not found in all files from ${group.name}"/> + <data format="metacyto_clr.txt" name="cluster_list" label="List of clusters from guided clustering analysis of ${group.name}"/> + <collection type="list" label="Guided cluster identification analysis in ${group.name}" name="output"> + <discover_datasets pattern="(?P<name>.*)" directory="fcs_stats" format="metacyto_stats.txt" /> + </collection> + </outputs> + <tests> + <test> + <param name="summary" value="preprocess.metacyto_summary.txt"/> + <param name="group"> + <collection type="list"> + <element name="Group1" value="Group1.fcs"/> + <element name="Group2" value="Group2.fcs"/> + </collection> + </param> + <param name="first_def" value="CD3+,CD4-,CD8+" /> + <repeat name="cl_df"> + <param name="cluster_def" value="CD8+,CCR7+" /> + </repeat> + <output name="unused" > + <assert_contents> + <has_n_lines n="2"/> + </assert_contents> + </output> + <output_collection name="output"> + <element name="Group2_cluster_stats.txt"> + <assert_contents> + <has_n_lines n="9"/> + </assert_contents> + </element> + </output_collection> + </test> + </tests> + <help><![CDATA[ +This tool uses MetaCyto to search for clusters of cells corresponding to provided gate definitions +-------------------------------------------------------------------------------------------------- + +**Input files** + +This tool requires the pre-processing summary generated for MetaCyto as well as the pre-processed FCS files. + +*Cluster definitions* + +Please provide gate definitions as comma-separated lists of marker names, for instance "CD3+, CD4+, CD25+, Foxp3+". + +**Output** + +This tool generates a list of provided clusters definitions, as well as a table of the MFI for each marker in each cluster in each file, and proportion of each cluster in each file. A list of unused cluster definitions, if any, is also generated. + +**Example** + +*Input* - Pre-Processing Summary Table + +======== =========================== ========= +study_id antibodies filenames +======== =========================== ========= +group1 Marker1|Marker2|Marker3|... file1.fcs +group2 Marker1|Marker2|Marker3|... file2.fcs +... ... ... +======== =========================== ========= + + +*Output* - Clustering Summary Tables + +========== ================ ========== ========================== ================ ======= ======= === ======== +group_name fcs_files cluster_id label fcs_names Marker1 Marker2 ... fraction +========== ================ ========== ========================== ================ ======= ======= === ======== +group1 file1_group1.fcs cluster1 Marker1+|Marker2+|Marker3+ file1_group1.fcs 1.9815 0.2024 ... 0.373 +group1 file2_group1.fcs cluster1 Marker1+|Marker2+|Marker3+ file2_group1.fcs 2.3739 0.3377 ... 0.26 + ... ... ... ... ... ... ... ... ... +========== ================ ========== ========================== ================ ======= ======= === ======== + ]]> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/preprocess.metacyto_summary.txt Wed Jul 28 22:02:38 2021 +0000 @@ -0,0 +1,11 @@ +study_id antibodies filenames +Group1 FSC-A|SSC-A|CD11A|CD4|CD16|CD28|CD95|CD3|CD62L|CD8B|CCR7|TIME|SAMPLE_ID inputflow6.fcs +Group1 FSC-A|SSC-A|CD11A|CD4|CD16|CD28|CD95|CD3|CD62L|CD8B|CCR7|TIME|SAMPLE_ID inputflow5.fcs +Group1 FSC-A|SSC-A|CD11A|CD4|CD16|CD28|CD95|CD3|CD62L|CD8B|CCR7|TIME|SAMPLE_ID inputflow4.fcs +Group1 FSC-A|SSC-A|CD11A|CD4|CD16|CD28|CD95|CD3|CD62L|CD8B|CCR7|TIME|SAMPLE_ID inputflow3.fcs +Group1 FSC-A|SSC-A|CD11A|CD4|CD16|CD28|CD95|CD3|CD62L|CD8B|CCR7|TIME|SAMPLE_ID inputflow2.fcs +Group1 FSC-A|SSC-A|CD11A|CD4|CD16|CD28|CD95|CD3|CD62L|CD8B|CCR7|TIME|SAMPLE_ID inputflow1.fcs +Group2 TIME|CELL_LENGTH|DEAD|CD19|CD4|CD8|IGD|CD85J|CD16|CD3|CD38|CD27|CD14|CD94|CCR7|CD45RA|CD20|CD127|CD33|CD28|CD24|CD161|TCRGD|CD56|HLADR|CD25|DNA1|DNA2|SAMPLE_ID inputcytof4.fcs +Group2 TIME|CELL_LENGTH|DEAD|CD19|CD4|CD8|IGD|CD85J|CD16|CD3|CD38|CD27|CD14|CD94|CCR7|CD45RA|CD20|CD127|CD33|CD28|CD24|CD161|TCRGD|CD56|HLADR|CD25|DNA1|DNA2|SAMPLE_ID inputcytof3.fcs +Group2 TIME|CELL_LENGTH|DEAD|CD19|CD4|CD8|IGD|CD85J|CD16|CD3|CD38|CD27|CD14|CD94|CCR7|CD45RA|CD20|CD127|CD33|CD28|CD24|CD161|TCRGD|CD56|HLADR|CD25|DNA1|DNA2|SAMPLE_ID inputcytof2.fcs +Group2 TIME|CELL_LENGTH|DEAD|CD19|CD4|CD8|IGD|CD85J|CD16|CD3|CD38|CD27|CD14|CD94|CCR7|CD45RA|CD20|CD127|CD33|CD28|CD24|CD161|TCRGD|CD56|HLADR|CD25|DNA1|DNA2|SAMPLE_ID inputcytof1.fcs
