Mercurial > repos > azomics > flowsom_cross_comp
changeset 1:a1054bd1060a draft
planemo upload for repository https://github.com/ImmPortDB/immport-galaxy-tools/tree/master/flowtools/flowsom_cross_comp commit f2ed3cfcc71ccf61cf6058533d217443318fd024
author | azomics |
---|---|
date | Mon, 25 Sep 2023 22:04:40 +0000 |
parents | e796ed5dfd02 |
children | f611ded6c6df |
files | FlowSOMMApIndividualFCS.R FlowSOMMApIndividualFCS.xml |
diffstat | 2 files changed, 150 insertions(+), 121 deletions(-) [+] |
line wrap: on
line diff
--- a/FlowSOMMApIndividualFCS.R Tue Jun 23 19:49:33 2020 -0400 +++ b/FlowSOMMApIndividualFCS.R Mon Sep 25 22:04:40 2023 +0000 @@ -16,197 +16,224 @@ ## geometric mean from # https://stackoverflow.com/questions/2602583/geometric-mean-is-there-a-built-in -gm_mean = function(x, na.rm=TRUE){ - exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x)) +gm_mean <- function(x, na_rm = TRUE) { + exp(sum(log(x[x > 0]), na.rm = na_rm) / length(x)) } -prettyMarkerNames <- function(flowFrame){ - n <- flowFrame@parameters@data[, "name"] - d <- flowFrame@parameters@data[, "desc"] +pretty_marker_names <- function(flow_frame) { + n <- flow_frame@parameters@data[, "name"] + d <- flow_frame@parameters@data[, "desc"] d[is.na(d)] <- n[is.na(d)] - prettyNames <-list() - if(any(grepl("#",d))){ - # Support for hashtag notation: - # antibody#fluorochrome -> antibody (fluorochrome) - prettyNames <- gsub("#(.*)$"," (\\1)",d) + pretty_names <- list() + if (any(grepl("#", d))) { + # Support for hashtag notation: + pretty_names <- gsub("#(.*)$", " (\\1)", d) } else { - prettyNames <- paste(d, " <", n, ">", sep="") + pretty_names <- paste(d, " <", n, ">", sep = "") } - return(prettyNames) + return(pretty_names) } -checkMarkers <- function(fcsfiles, flag_ff=FALSE){ - markerCheck <- T - for (i in 1:length(fcsfiles)){ - if (flag_ff){ +check_markers <- function(fcsfiles, flag_ff = FALSE) { + marker_check <- TRUE + for (i in seq_along(fcsfiles)) { + if (flag_ff) { fcs <- readRDS(fcsfiles[i]) } else { - fcs <- read.FCS(fcsfiles[i], transformation=FALSE) + fcs <- read.FCS(fcsfiles[i], transformation = FALSE) } if (i == 1) { m1 <- as.vector(pData(parameters(fcs))$desc) } else { m2 <- as.vector(pData(parameters(fcs))$desc) - if (is.na(all(m1==m2))) { + if (is.na(all(m1 == m2))) { mm1 <- is.na(m1) mm2 <- is.na(m2) - if (all(mm1==mm2)){ - if (!all(m1==m2, na.rm=TRUE)){ - markerCheck <- F + if (all(mm1 == mm2)) { + if (!all(m1 == m2, na.rm = TRUE)) { + marker_check <- FALSE } } else { - markerCheck <- F + marker_check <- FALSE } - } else if (!all(m1==m2)) { - markerCheck <- F + } else if (!all(m1 == m2)) { + marker_check <- FALSE } } } - if (!markerCheck) { + if (!marker_check) { quit(save = "no", status = 13, runLast = FALSE) } } -mapToTree <- function(filenames, filepaths, flag_ff=FALSE, reftree, cluster=10, - outdir="",flag_meta=FALSE, mfi='mfi', stat1="", stat2="", - stat3="", plot="", mplot="") { +map_to_tree <- function(filenames, filepaths, + flag_ff = FALSE, reftree, cluster = 10, + outdir = "", flag_meta = FALSE, + mfi = "mfi", stat1 = "", stat2 = "", + stat3 = "", plot = "", mplot = "") { - checkMarkers(filepaths, flag_ff) - + check_markers(filepaths, flag_ff) # get tree fst <- readRDS(reftree) plots <- FALSE mplots <- FALSE dir.create(outdir) - if (!plot==""){ + if (!plot == "") { dir.create(plot) plots <- TRUE } - if (!mplot==""){ + if (!mplot == "") { dir.create(mplot) mplots <- TRUE } - metaC <- metaClustering_consensus(fst$map$codes, k=cluster, seed=33) - nb_pop <- if (flag_meta) cluster else max(fst$map$mapping[,1]) + meta_c <- metaClustering_consensus(fst$map$codes, k = cluster, seed = 33) + nb_pop <- if (flag_meta) cluster else max(fst$map$mapping[, 1]) nb_samples <- length(filepaths) nb_marker <- length(fst$prettyColnames) - print_markers <- gsub(' <.*>','',fst$prettyColnames) + print_markers <- gsub(" <.*>", "", fst$prettyColnames) print_markers_ff <- append(print_markers, "Population") - m_stat1 <- matrix(0, nrow=nb_samples, ncol=(nb_pop+2)) + m_stat1 <- matrix(0, nrow = nb_samples, ncol = (nb_pop + 2)) colnames(m_stat1) <- c("FileID", "SampleName", seq_len(nb_pop)) sink(stat2) - cat(print_markers, sep="\t") + cat(print_markers, sep = "\t") cat("\tPercentage\tPopulation\tSampleName\n") sink() col_m3 <- c("Population") for (m in print_markers){ - m1 <- paste(m, "mean", sep="_") - m2 <- paste(m, "median", sep="_") - m3 <- paste(m, "stdev", sep="_") - col_m3 <- append(col_m3, c(m1,m2,m3)) + m1 <- paste(m, "mean", sep = "_") + m2 <- paste(m, "median", sep = "_") + m3 <- paste(m, "stdev", sep = "_") + col_m3 <- append(col_m3, c(m1, m2, m3)) } - col_stat3 <- c(col_m3, "Percentage_mean","Percentage_median","Percentage_stdev") + col_stat3 <- c(col_m3, + "Percentage_mean", + "Percentage_median", + "Percentage_stdev") - for (i in 1:length(filepaths)){ - if (flag_ff){ + for (i in seq_along(filepaths)) { + if (flag_ff) { ff <- readRDS(filepaths[i]) } else { - ff <- read.FCS(filepaths[i], transformation=FALSE) + ff <- read.FCS(filepaths[i], transformation = FALSE) } - if (i==1) { + if (i == 1) { # compare to tree markers - pm <- prettyMarkerNames(ff) - if (!all(fst$prettyColnames %in% pm)){ + pm <- pretty_marker_names(ff) + if (!all(fst$prettyColnames %in% pm)) { quit(save = "no", status = 14, runLast = FALSE) } } fsom <- NewData(fst, ff) - if (mplots){ + if (mplots) { markers <- colnames(ff) - tmpmplot <- paste(filenames[i], "marker_plots.pdf", sep="_") - pdf(file.path(mplot,tmpmplot), useDingbats=FALSE, onefile=TRUE) + tmpmplot <- paste(filenames[i], "marker_plots.pdf", sep = "_") + pdf(file.path(mplot, tmpmplot), useDingbats = FALSE, onefile = TRUE) for (marker in markers){ PlotMarker(fsom, marker) } dev.off() } - if (!plot==""){ - plotpath <- paste(filenames[i], "tree.png", sep="_") - png(file.path(plot, plotpath), type="cairo", height=800, width=800) - PlotStars(fsom, backgroundValues = as.factor(metaC)) + if (!plot == "") { + plotpath <- paste(filenames[i], "tree.png", sep = "_") + png(file.path(plot, plotpath), type = "cairo", height = 800, width = 800) + PlotStars(fsom, backgroundValues = as.factor(meta_c)) dev.off() } - m <- matrix(0,nrow=nrow(ff),ncol=1) + m <- matrix(0, nrow = nrow(ff), ncol = 1) s <- seq_len(nrow(ff)) - if (flag_meta){ - m[s,] <- metaC[fsom$map$mapping[,1]] + if (flag_meta) { + m[s, ] <- meta_c[fsom$map$mapping[, 1]] } else { - m[s,] <- fsom$map$mapping[,1] + m[s, ] <- fsom$map$mapping[, 1] } colnames(m) <- "FlowSOM" - ff <- cbind2(ff,m) + ff <- cbind2(ff, m) out <- exprs(ff) colnames(out) <- print_markers_ff - clr_table <- paste(filenames[i], "clustered.flowclr", sep="_") - write.table(out, file=file.path(outdir, clr_table), quote=F, row.names=F, col.names=T, sep='\t', - append=F) + clr_table <- paste(filenames[i], "clustered.flowclr", sep = "_") + write.table(out, + file = file.path(outdir, clr_table), + quote = FALSE, + row.names = FALSE, + col.names = TRUE, + sep = "\t", + append = FALSE) - cluster_count <- table(out[,"Population"]) - cluster_prop <- prop.table(cluster_count)*100 + cluster_count <- table(out[, "Population"]) + cluster_prop <- prop.table(cluster_count) * 100 m1_tmp <- numeric(nb_pop) for (j in 1:nb_pop){ - if (as.character(j) %in% names(cluster_count)){ - m1_tmp[[j]] <- format(round(cluster_prop[[as.character(j)]], 2), nsmall=2) + if (as.character(j) %in% names(cluster_count)) { + m1_tmp[[j]] <- format( + round(cluster_prop[[as.character(j)]], 2), + nsmall = 2) } } - samplename <- paste("Sample", i, sep="") - m_stat1[i,] <- c(filenames[i], samplename, m1_tmp) + samplename <- paste("Sample", i, sep = "") + m_stat1[i, ] <- c(filenames[i], samplename, m1_tmp) # flowstat2 # Marker1 Marker2 Marker3 ... Population Percentage SampleName # MFIs for each marker # dimension ==> col = nb of markers + 3; row = nb of files * nb of clusters - if (mfi=="mfi"){ - m2 <- aggregate(out[,1:nb_marker], list(out[,nb_marker+1]), mean) - } else if (mfi=="mdfi") { - m2 <- aggregate(out[,1:nb_marker], list(out[,nb_marker+1]), median) + if (mfi == "mfi") { + m2 <- aggregate(out[, 1:nb_marker], list(out[, nb_marker + 1]), mean) + } else if (mfi == "mdfi") { + m2 <- aggregate(out[, 1:nb_marker], list(out[, nb_marker + 1]), median) } else { - m2 <- aggregate(out[,1:nb_marker], list(out[,nb_marker+1]), gm_mean) + m2 <- aggregate(out[, 1:nb_marker], list(out[, nb_marker + 1]), gm_mean) } m2["Percentage"] <- as.character(cluster_prop) m2["Population"] <- as.character(m2$Group.1) m2["SampleName"] <- samplename m2t <- as.matrix(m2[2:length(m2)]) - write.table(m2t, file=stat2, quote=F, row.names=F, col.names=F, sep='\t', - append=T) + write.table(m2t, + file = stat2, + quote = FALSE, + row.names = FALSE, + col.names = FALSE, + sep = "\t", + append = TRUE) } - write.table(m_stat1, file=stat1, quote=F, row.names=F, col.names=T, sep='\t', - append=F) + write.table(m_stat1, + file = stat1, + quote = FALSE, + row.names = FALSE, + col.names = TRUE, + sep = "\t", + append = FALSE) - m2df <- read.table(stat2, sep="\t", header=T) - ag <- aggregate(m2df[,0:nb_marker+1], list(m2df[,nb_marker+2]), function(x) c(mn=mean(x), med=median(x), stdv=sd(x))) + m2df <- read.table(stat2, sep = "\t", header = TRUE) + ag <- aggregate(m2df[, 0:nb_marker + 1], + list(m2df[, nb_marker + 2]), + function(x) c(mn = mean(x), med = median(x), stdv = sd(x))) m3t <- as.matrix(ag) colnames(m3t) <- col_stat3 - write.table(m3t, file=stat3, quote=F, row.names=F, col.names=T, sep='\t', - append=F) + write.table(m3t, file = stat3, + quote = FALSE, + row.names = FALSE, + col.names = TRUE, sep = "\t", + append = FALSE) } -flowFrameOrFCS <- function(filenames, filepaths, reftree, cluster=10, outdir="", - flag_meta=FALSE, mfi='mfi', stat1="", stat2="", - stat3="", plot="", mplot=""){ - - isValid <- F - is_fcs <- F - is_ff <- F +flow_frame_or_fcs <- function(filenames, + filepaths, + reftree, + cluster = 10, outdir = "", + flag_meta = FALSE, + mfi = "mfi", stat1 = "", stat2 = "", + stat3 = "", plot = "", mplot = "") { + is_fcs <- FALSE + is_ff <- FALSE flag_ff <- FALSE i <- 0 for (f in filepaths){ @@ -215,10 +242,10 @@ }, error = function(ex) { print(paste(ex)) }) - if (!is_fcs){ + if (!is_fcs) { tryCatch({ ff <- readRDS(f) - is_ff <- T + is_ff <- TRUE }, error = function(ex) { print(paste(ex)) }) @@ -229,13 +256,15 @@ quit(save = "no", status = 10, runLast = FALSE) } } - if (i==0) { + if (i == 0) { flag_ff <- TRUE - } else if (!i==length(filenames)){ + } else if (!i == length(filenames)) { quit(save = "no", status = 12, runLast = FALSE) } - mapToTree(filenames, filepaths, flag_ff, reftree, cluster, outdir, flag_meta, - mfi, stat1, stat2, stat3, plot, mplot) + map_to_tree(filenames, + filepaths, + flag_ff, reftree, cluster, outdir, flag_meta, + mfi, stat1, stat2, stat3, plot, mplot) } args <- commandArgs(trailingOnly = TRUE) @@ -243,35 +272,35 @@ mplot <- "" m <- 8 flag_meta <- FALSE -if (args[4]=='meta') { +if (args[4] == "meta") { flag_meta <- TRUE } -if (args[9] == 'newDataTrees') { - plot <- 'newDataTrees' +if (args[9] == "newDataTrees") { + plot <- "newDataTrees" m <- m + 1 - if (args[10] == 'newDataMarkers') { + if (args[10] == "newDataMarkers") { mplot <- "newDataMarkers" m <- m + 1 } -} else if (args[9] == 'newDataMarkers') { +} else if (args[9] == "newDataMarkers") { mplot <- "newDataMarkers" m <- m + 1 } -n <- m+1 -# files + filenames +n <- m + 1 nb_files <- (length(args) - m) / 2 files1 <- character(nb_files) files2 <- character(nb_files) j <- 1 file_list <- args[n:length(args)] -for (i in 1:length(file_list)) { - if (i%%2){ +for (i in seq_along(file_list)) { + if (i %% 2) { files1[[j]] <- file_list[i] - files2[[j]] <- file_list[i+1] + files2[[j]] <- file_list[i + 1] j <- j + 1 } } -flowFrameOrFCS(files2, files1, args[1], as.numeric(args[3]), args[2], flag_meta, - args[5], args[6], args[7], args[8], plot, mplot) +flow_frame_or_fcs(files2, + files1, args[1], as.numeric(args[3]), args[2], flag_meta, + args[5], args[6], args[7], args[8], plot, mplot)
--- a/FlowSOMMApIndividualFCS.xml Tue Jun 23 19:49:33 2020 -0400 +++ b/FlowSOMMApIndividualFCS.xml Mon Sep 25 22:04:40 2023 +0000 @@ -1,14 +1,14 @@ -<tool id="flowsom_cross_comp" name="Map individual FCS to reference SOM" version="1.0"> +<tool id="flowsom_cross_comp" name="Map individual FCS to reference SOM" version="1.0+galaxy1"> <description>using FlowSOM.</description> <requirements> - <requirement type="package" version="1.8.0">bioconductor-flowsom</requirement> + <requirement type="package" version="1.16.0">bioconductor-flowsom</requirement> </requirements> <stdio> - <exit_code range="10" level="fatal" description="Please provide valid input files (FCS or flowFrame)." /> - <exit_code range="11" level="fatal" description="Please provide a valid reference SOM tree." /> - <exit_code range="12" level="fatal" description="Input files should all be in the same format." /> - <exit_code range="13" level="fatal" description="There are inconsistencies in marker names between FCS files." /> - <exit_code range="14" level="fatal" description="FCS file and reference tree markers are inconsistent." /> + <exit_code range="10" level="fatal" description="Please provide valid input files (FCS or flowFrame)."/> + <exit_code range="11" level="fatal" description="Please provide a valid reference SOM tree."/> + <exit_code range="12" level="fatal" description="Input files should all be in the same format."/> + <exit_code range="13" level="fatal" description="There are inconsistencies in marker names between FCS files."/> + <exit_code range="14" level="fatal" description="FCS file and reference tree markers are inconsistent."/> </stdio> <command><![CDATA[ Rscript $__tool_directory__/FlowSOMMApIndividualFCS.R '${reftree}' 'newDataOutput' '${cluster}' '${tablecontent}' '${mfi}' '${stats}' '${mfistats}' '${allstats}' @@ -30,9 +30,9 @@ <option value="FlowSOM">FlowSOM Clusters</option> <option value="meta">Meta-Clusters</option> </param> - <param name="cluster" type="integer" value="10" optional="true" label="Number of expected metaclusters" help="By default, the number of expected metaclusters is set to 10" /> - <param name="plot" type="boolean" label="Plot SOM Tree?" checked="false" truevalue="TRUE" falsevalue="FALSE" /> - <param name="mplot" type="boolean" label="Plot individual marker trees?" checked="false" truevalue="TRUE" falsevalue="FALSE" /> + <param name="cluster" type="integer" value="10" optional="true" label="Number of expected metaclusters" help="By default, the number of expected metaclusters is set to 10"/> + <param name="plot" type="boolean" label="Plot SOM Tree?" checked="false" truevalue="TRUE" falsevalue="FALSE"/> + <param name="mplot" type="boolean" label="Plot individual marker trees?" checked="false" truevalue="TRUE" falsevalue="FALSE"/> <param name="mfi" type="select" label="Calculate centroids using:"> <option value="mfi" selected="true">Mean Fluorescence Intensity</option> <option value="mdfi">Median Fluorescence Intensity</option> @@ -41,17 +41,17 @@ </inputs> <outputs> <collection type="list" label="Mapping of ${input.name} to ${reftree.name}" name="output"> - <discover_datasets pattern="(?P<name>.*)" directory="newDataOutput" format="flowclr" /> + <discover_datasets pattern="(?P<name>.*)" directory="newDataOutput" format="flowclr"/> </collection> <data format="flowstat1" name="stats" label="${tablecontent} cluster distribution in ${input.name}"/> <data format="flowstat2" name="mfistats" label="${tablecontent} cluster ${mfi} centroids in ${input.name}"/> <data format="flowstat3" name="allstats" label="${tablecontent} cluster descriptive stats of ${input.name}"/> <collection type="list" label="Tree representations of mapping ${input.name} to ${reftree.name}" name="trees"> - <discover_datasets pattern="(?P<name>.*)" directory="newDataTrees" format="png" /> + <discover_datasets pattern="(?P<name>.*)" directory="newDataTrees" format="png"/> <filter>(plot)</filter> </collection> <collection type="list" label="Marker trees from mapping ${input.name} to ${reftree.name}" name="mtrees"> - <discover_datasets pattern="(?P<name>.*)" directory="newDataMarkers" format="pdf" /> + <discover_datasets pattern="(?P<name>.*)" directory="newDataMarkers" format="pdf"/> <filter>(mplot)</filter> </collection> </outputs> @@ -70,7 +70,7 @@ <param name="plot" value="FALSE"/> <param name="mplot" value="FALSE"/> <param name="mfi" value="mfi"/> - <output name="stats" file="out1.flowstat1" compare="sim_size"/> + <output name="stats" file="out1.flowstat1" compare="sim_size"/> <output name="mfistats" file="out1.flowstat2" compare="sim_size"/> <output name="allstats" file="out1.flowstat3" compare="sim_size"/> <output_collection name="output" type="list" count="3">