Mercurial > repos > azomics > metacyto_search_cl

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/metacyto_search_clr.R	Wed Jul 28 22:02:38 2021 +0000
@@ -0,0 +1,191 @@
+#!/usr/bin/env Rscript
+######################################################################
+#                  Copyright (c) 2018 Northrop Grumman.
+#                          All rights reserved.
+######################################################################
+#
+# Version 1 - January 2018
+# Author: Cristel Thomas
+#
+#
+
+library(flowCore)
+library(MetaCyto)
+
+check_cluster_def <- function(cl_def) {
+  if (cl_def == "" || cl_def == "None") {
+    quit(save = "no", status = 14, runLast = FALSE)
+  } else {
+    tmp <- gsub(" ", "", cl_def, fixed = TRUE)
+    clean_def <- gsub(",", "|", tmp, fixed = TRUE)
+    return(toupper(clean_def))
+  }
+}
+
+path_to_group_file <- function(path_to_result) {
+  grp <- basename(dirname(path_to_result))
+  return(paste(grp, "fcs", sep = ".", collapse = NULL))
+}
+
+group_file_to_group_name <- function(result_file) {
+  return(strsplit(result_file, ".", fixed = TRUE)[[1]][1])
+}
+
+
+search_cluster_panels <- function(df, fcspaths, fcsnames, outdir="", uc="",
+                              clusters=vector()) {
+
+  working_dir <- "tmp_metacyto"
+  working_out <- "tmp_metacyto_out"
+  dir.create(working_dir)
+  dir.create(outdir)
+
+  # reformat summary -- expects csv + 'fcs_names' && 'fcs_files'
+  new_df <- file.path(working_dir, "processed_sample_summary.csv")
+  df$fcs_names <- df$filenames
+  df$fcs_files <- df$filenames
+  write.csv(df, file = new_df, row.names = F)
+
+  # move && rename FCS files to same directory
+  for (i in seq_len(length(fcspaths))) {
+    new_file <- file.path(working_dir, fcsnames[[i]])
+    file.copy(fcspaths[[i]], new_file)
+  }
+
+  searchCluster.batch(preprocessOutputFolder = working_dir,
+                      outpath = working_out,
+                      clusterLabel = clusters)
+
+  result_files <- list.files(working_out,
+                             pattern = "cluster_stats_in_each_sample",
+                             recursive = T,
+                             full.names = T)
+
+  nb_groups <- length(fcsnames)
+  no_results <- vector()
+  if (length(result_files) != nb_groups) {
+   groups_with_results <- sapply(result_files, path_to_group_file)
+   ## one or more groups with no results, figure out which
+   no_results <- setdiff(fcsnames, groups_with_results)
+  }
+
+  if (length(no_results) == nb_groups) {
+    sink(uc)
+    cat("No clusters were found in none of the groups.")
+    sink()
+  } else {
+    unused_clrs <- list()
+
+    if (length(no_results > 0)) {
+      grp_no_results <- sapply(no_results, group_file_to_group_name)
+      unused_clrs <- data.frame("cluster_label" = "any", "not_found_in" = grp_no_results)
+    }
+
+    for (result in result_files) {
+      group_name <- strsplit(result, .Platform$file.sep)[[1]][2]
+      new_filename <- paste(c(group_name, "cluster_stats.txt"), collapse = "_")
+      new_path <- file.path(outdir, new_filename)
+      tmp_df <- read.csv(result)
+
+      used_clr <- as.character(unique(tmp_df$label))
+      if (length(used_clr) != length(clusters)) {
+        unused <- setdiff(clusters, used_clr)
+        tmp_udf <- data.frame("cluster_label" = unused, "not_found_in" = group_name)
+        unused_clrs <- rbind(unused_clrs, tmp_udf)
+      }
+      colnames(tmp_df)[[1]] <- "group_name"
+      write.table(tmp_df, new_path, quote = F, row.names = F, col.names = T, sep = "\t")
+    }
+
+    if (is.null(dim(unused_clrs))) {
+      sink(uc)
+      cat("All provided cluster definition were found in all provided FCS files.")
+      sink()
+    } else {
+      write.table(unused_clrs, uc, quote = F, row.names = F, col.names = T, sep = "\t")
+    }
+  }
+}
+
+
+check_input <- function(report="", outdir="", list_unused="", list_clusters="",
+                       fcs_files=list(), grp_names=list(), clusters=vector()) {
+  # check FCS files
+  fcspaths <- unlist(fcs_files)
+  fcsnames <- unlist(grp_names)
+  ct_files <- 0
+  some_pb <- FALSE
+  for (i in seq_len(length(fcspaths))) {
+    is_file_valid <- FALSE
+    tryCatch({
+      fcs <- read.FCS(fcspaths[[i]], transformation = FALSE)
+      is_file_valid <- TRUE
+    }, error = function(ex) {
+      print(paste("File is not a valid FCS file:", fcsnames[[i]], ex))
+    })
+    if (is_file_valid) {
+      metacyto_pp_check <- if ("sample_id" %in% colnames(fcs)) TRUE else FALSE
+      if (metacyto_pp_check) {
+        idx <- length(colnames(fcs))
+        ct_files <- ct_files + max(fcs@exprs[, idx])
+      } else {
+        quit(save = "no", status = 11, runLast = FALSE)
+      }
+    } else {
+      some_pb <- TRUE
+    }
+  }
+  # check summary file format
+  df <- read.table(report, sep = "\t", header = T, colClasses = "character")
+  nm <- colnames(df)
+  check_ab <- if ("antibodies" %in% nm) TRUE else FALSE
+  check_sdy <- if ("study_id" %in% nm) TRUE else FALSE
+
+  if (check_sdy && check_ab) {
+    # check that summary index compatible with FCSs in collection - by number of files == index nb
+    if (ct_files != length(df$antibodies)) {
+      quit(save = "no", status = 12, runLast = FALSE)
+    }
+  } else {
+    quit(save = "no", status = 13, runLast = FALSE)
+  }
+
+  if (some_pb) {
+    quit(save = "no", status = 10, runLast = FALSE)
+  } else {
+    write.table(clusters, list_clusters, quote = F, row.names = F, col.names = F)
+    search_cluster_panels(df, fcspaths, fcsnames, outdir, list_unused, clusters)
+  }
+}
+
+################################################################################
+################################################################################
+args <- commandArgs(trailingOnly = TRUE)
+
+i <- grep(args, pattern = "FCS_FILES")
+
+cluster_def <- vector()
+cl_df <- args[3]
+if (i > 6) {
+  ii <- i - 1
+  more_cl <- args[6:ii]
+  cl_df <- c(cl_df, more_cl)
+}
+cluster_def <- sapply(cl_df, check_cluster_def)
+
+fcs_files <- list()
+fcs_names <- list()
+j <- 1
+m <- i + 1
+tmp_fcs <- args[m:length(args)]
+
+for (k in seq_len(length(tmp_fcs))) {
+  if (k %% 2) {
+    fcs_files[[j]] <- tmp_fcs[[k]]
+    fcs_names[[j]] <- tmp_fcs[[k + 1]]
+    j <- j + 1
+  }
+}
+
+check_input(args[1], args[2], args[4], args[5], fcs_files, fcs_names,
+           cluster_def)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/metacyto_search_clr.xml	Wed Jul 28 22:02:38 2021 +0000
@@ -0,0 +1,108 @@
+<tool id="metacyto_search_clr" name="Guided identification of clusters" version="1.0+galaxy0" profile="18.01">
+  <description>using MetaCyto</description>
+  <requirements>
+    <requirement type="package" version="1.4.0">bioconductor-metacyto</requirement>
+  </requirements>
+  <stdio>
+    <exit_code range="1:9" />
+    <exit_code range="10" level="fatal" description="Please provide valid input FCS files." />
+    <exit_code range="11" level="fatal" description="Please provide FCS files pre-processed for MetaCyto." />
+    <exit_code range="12" level="fatal" description="Pre-processing summary doesn't match the set of FCS files." />
+    <exit_code range="13" level="fatal" description="The pre-processing summary is in the wrong format." />
+    <exit_code range="14" level="fatal" description="Please provide a cluster definition" />
+    <exit_code range="15:" />
+  </stdio>
+  <command><![CDATA[
+    Rscript --slave --vanilla '$__tool_directory__/metacyto_search_clr.R' '${summary}' 'fcs_stats' '${first_def}' '${unused}' '${cluster_list}'
+  #for $r in $cl_df
+    '${r.cluster_def}'
+  #end for
+  'FCS_FILES'
+  #for $f in $group
+    '${f}' '${f.name}'
+  #end for
+  ]]>
+  </command>
+  <inputs>
+    <param format="metacyto_summary.txt" name="summary" type="data" label="MetaCyto preprocessing summary"/>
+    <param format="fcs" name="group" type="data_collection" collection_type="list" label="FCS files Collection pre-processed for MetaCyto"/>
+    <param name="first_def" type="text" label="Cluster definition" help="i.e.:CD3+,CD4-,CD8+,CCR7+"/>
+    <repeat name="cl_df" title="Cluster">
+      <param name="cluster_def" type="text" label="Additional cluster definition" help="i.e.:CD3+,CD4-,CD8+,CCR7+"/>
+    </repeat>
+  </inputs>
+  <outputs>
+    <data format="tabular" name="unused" label="List of clusters not found in all files from ${group.name}"/>
+    <data format="metacyto_clr.txt" name="cluster_list" label="List of clusters from guided clustering analysis of ${group.name}"/>
+    <collection type="list" label="Guided cluster identification analysis in ${group.name}" name="output">
+      <discover_datasets pattern="(?P&lt;name&gt;.*)" directory="fcs_stats" format="metacyto_stats.txt" />
+    </collection>
+  </outputs>
+  <tests>
+    <test>
+      <param name="summary" value="preprocess.metacyto_summary.txt"/>
+      <param name="group">
+        <collection type="list">
+          <element name="Group1" value="Group1.fcs"/>
+          <element name="Group2" value="Group2.fcs"/>
+        </collection>
+      </param>
+      <param name="first_def" value="CD3+,CD4-,CD8+" />
+      <repeat name="cl_df">
+        <param name="cluster_def" value="CD8+,CCR7+" />
+      </repeat>
+      <output name="unused" >
+        <assert_contents>
+          <has_n_lines n="2"/>
+        </assert_contents>
+      </output>
+      <output_collection name="output">
+        <element name="Group2_cluster_stats.txt">
+          <assert_contents>
+            <has_n_lines n="9"/>
+          </assert_contents>
+        </element>
+      </output_collection>
+    </test>
+  </tests>
+  <help><![CDATA[
+This tool uses MetaCyto to search for clusters of cells corresponding to provided gate definitions
+--------------------------------------------------------------------------------------------------
+
+**Input files**
+
+This tool requires the pre-processing summary generated for MetaCyto as well as the pre-processed FCS files.
+
+*Cluster definitions*
+
+Please provide gate definitions as comma-separated lists of marker names, for instance "CD3+, CD4+, CD25+, Foxp3+".
+
+**Output**
+
+This tool generates a list of provided clusters definitions, as well as a table of the MFI for each marker in each cluster in each file, and proportion of each cluster in each file. A list of unused cluster definitions, if any, is also generated.
+
+**Example**
+
+*Input* - Pre-Processing Summary Table
+
+========  ===========================  =========
+study_id  antibodies                   filenames
+========  ===========================  =========
+group1    Marker1|Marker2|Marker3|...  file1.fcs
+group2    Marker1|Marker2|Marker3|...  file2.fcs
+...       ...                          ...
+========  ===========================  =========
+
+
+*Output* - Clustering Summary Tables
+
+========== ================ ========== ========================== ================ ======= ======= === ========
+group_name fcs_files        cluster_id label                      fcs_names        Marker1 Marker2 ... fraction
+========== ================ ========== ========================== ================ ======= ======= === ========
+group1     file1_group1.fcs cluster1   Marker1+|Marker2+|Marker3+ file1_group1.fcs 1.9815  0.2024  ... 0.373
+group1     file2_group1.fcs cluster1   Marker1+|Marker2+|Marker3+ file2_group1.fcs 2.3739  0.3377  ... 0.26
+   ...        ...              ...        ...                        ...              ...     ...  ... ...
+========== ================ ========== ========================== ================ ======= ======= === ========
+  ]]>
+  </help>
+</tool>
Binary file test-data/Group1.fcs has changed
Binary file test-data/Group2.fcs has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/preprocess.metacyto_summary.txt	Wed Jul 28 22:02:38 2021 +0000
@@ -0,0 +1,11 @@
+study_id	antibodies	filenames
+Group1	FSC-A|SSC-A|CD11A|CD4|CD16|CD28|CD95|CD3|CD62L|CD8B|CCR7|TIME|SAMPLE_ID	inputflow6.fcs
+Group1	FSC-A|SSC-A|CD11A|CD4|CD16|CD28|CD95|CD3|CD62L|CD8B|CCR7|TIME|SAMPLE_ID	inputflow5.fcs
+Group1	FSC-A|SSC-A|CD11A|CD4|CD16|CD28|CD95|CD3|CD62L|CD8B|CCR7|TIME|SAMPLE_ID	inputflow4.fcs
+Group1	FSC-A|SSC-A|CD11A|CD4|CD16|CD28|CD95|CD3|CD62L|CD8B|CCR7|TIME|SAMPLE_ID	inputflow3.fcs
+Group1	FSC-A|SSC-A|CD11A|CD4|CD16|CD28|CD95|CD3|CD62L|CD8B|CCR7|TIME|SAMPLE_ID	inputflow2.fcs
+Group1	FSC-A|SSC-A|CD11A|CD4|CD16|CD28|CD95|CD3|CD62L|CD8B|CCR7|TIME|SAMPLE_ID	inputflow1.fcs
+Group2	TIME|CELL_LENGTH|DEAD|CD19|CD4|CD8|IGD|CD85J|CD16|CD3|CD38|CD27|CD14|CD94|CCR7|CD45RA|CD20|CD127|CD33|CD28|CD24|CD161|TCRGD|CD56|HLADR|CD25|DNA1|DNA2|SAMPLE_ID	inputcytof4.fcs
+Group2	TIME|CELL_LENGTH|DEAD|CD19|CD4|CD8|IGD|CD85J|CD16|CD3|CD38|CD27|CD14|CD94|CCR7|CD45RA|CD20|CD127|CD33|CD28|CD24|CD161|TCRGD|CD56|HLADR|CD25|DNA1|DNA2|SAMPLE_ID	inputcytof3.fcs
+Group2	TIME|CELL_LENGTH|DEAD|CD19|CD4|CD8|IGD|CD85J|CD16|CD3|CD38|CD27|CD14|CD94|CCR7|CD45RA|CD20|CD127|CD33|CD28|CD24|CD161|TCRGD|CD56|HLADR|CD25|DNA1|DNA2|SAMPLE_ID	inputcytof2.fcs
+Group2	TIME|CELL_LENGTH|DEAD|CD19|CD4|CD8|IGD|CD85J|CD16|CD3|CD38|CD27|CD14|CD94|CCR7|CD45RA|CD20|CD127|CD33|CD28|CD24|CD161|TCRGD|CD56|HLADR|CD25|DNA1|DNA2|SAMPLE_ID	inputcytof1.fcs