cnv_facets: facets_analysis.R comparison

comparison facets_analysis.R @ 2:66a56502199d draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e

author	artbio
date	Fri, 03 Oct 2025 23:59:36 +0000
parents
children	d1914f4d9daf

comparison

equal deleted inserted replaced

-:8e1fb7253c1d
+:66a56502199d
+#!/usr/bin/env Rscript
+# Description:
+#   This script serves as the backend for the Galaxy FACETS Analysis tool.
+#   It takes a SNP pileup file as input and performs allele-specific copy
+#   number analysis using the R package 'facets'.
+# ==============================================================================
+# --- 1. Load Libraries ---
+suppressPackageStartupMessages(library(argparse))
+suppressPackageStartupMessages(library(facets))
+# --- 2. Define and Parse Arguments ---
+# Create the parser
+parser <- ArgumentParser(description = "Run FACETS algorithm on a SNP pileup file.")
+# Define arguments
+parser$add_argument("--pileup",
+type = "character", required = TRUE,
+help = "Path to the gzipped pileup CSV file."
+)
+parser$add_argument("--sample_id",
+type = "character", required = TRUE,
+help = "Sample ID used for plot titles and metadata."
+)
+parser$add_argument("--output_seg",
+type = "character", required = TRUE,
+help = "Path for the output segmentation file (TSV)."
+)
+parser$add_argument("--output_summary",
+type = "character", required = TRUE,
+help = "Path for the output summary file (TSV)."
+)
+parser$add_argument("--output_plots",
+type = "character", required = TRUE,
+help = "Path for the output plots file (PDF)."
+)
+parser$add_argument("--cval",
+type = "double", default = 150,
+help = "Critical value for segmentation."
+)
+parser$add_argument("--min_nhet",
+type = "integer", default = 25,
+help = "Minimum number of heterozygous SNPs per segment."
+)
+parser$add_argument("--snp_nbhd",
+type = "integer", default = 300,
+help = "SNP neighborhood size for pre-processing. Crucial for sparse VCFs."
+)
+parser$add_argument("--gbuild",
+type = "character", default = "hg38",
+choices = c("hg19", "hg38", "hg18", "mm9", "mm10"),
+help = "Genome build used for alignment."
+)
+# --- 3. Main Analysis Function ---
+main <- function(args) {
+# Set seed for reproducibility
+set.seed(1965)
+# --- Read the data with readSnpMatrix() from facets ---
+rcmat <- readSnpMatrix(args$pileup)
+# --- Pre-process sample ---
+xx <- preProcSample(rcmat, gbuild = args$gbuild, snp.nbhd = args$snp_nbhd)
+# --- Process sample (segmentation) ---
+oo <- procSample(xx, cval = args$cval, min.nhet = args$min_nhet)
+# --- Estimate ploidy/purity ---
+fit <- emcncf(oo)
+# Write the main segmentation file
+cncf_output <- fit$cncf
+if (nrow(cncf_output) > 0) {
+cncf_output$purity <- fit$purity
+cncf_output$ploidy <- fit$ploidy
+# Reorder columns to have purity/ploidy first for clarity
+cncf_output <- cncf_output[, c("purity", "ploidy", setdiff(names(cncf_output), c("purity", "ploidy")))]
+}
+write.table(cncf_output, file = args$output_seg, sep = "\t", quote = FALSE, row.names = FALSE)
+# Write a key-value summary file
+# A NULL value is replaced by NA to preserve vector length.
+summary_df <- data.frame(
+Parameter = c(
+"sample_id", "purity", "ploidy", "dipLogR", "loglik",
+"cval_param", "min_nhet_param", "snp_nbhd_param", "gbuild_param"
+),
+Value = c(
+args$sample_id,
+ifelse(is.null(fit$purity), NA, fit$purity),
+ifelse(is.null(fit$ploidy), NA, fit$ploidy),
+ifelse(is.null(fit$dipLogR), NA, fit$dipLogR),
+ifelse(is.null(fit$loglik), NA, fit$loglik),
+args$cval,
+args$min_nhet,
+args$snp_nbhd,
+args$gbuild
+)
+)
+write.table(summary_df, file = args$output_summary, sep = "\t", quote = FALSE, row.names = FALSE)
+# Generate the plots PDF
+pdf(file = args$output_plots, width = 12, height = 8)
+plotSample(x = oo, emfit = fit, sname = args$sample_id)
+dev.off()
+}
+# --- 4. Execution Block ---
+if (!interactive()) {
+args <- parser$parse_args()
+main(args)
+}

Mercurial > repos > artbio > cnv_facets

comparison facets_analysis.R @ 2:66a56502199d draft