Mercurial > repos > artbio > cnv_facets
diff facets_analysis.R @ 2:66a56502199d draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
| author | artbio |
|---|---|
| date | Fri, 03 Oct 2025 23:59:36 +0000 |
| parents | |
| children | d1914f4d9daf |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/facets_analysis.R Fri Oct 03 23:59:36 2025 +0000 @@ -0,0 +1,117 @@ +#!/usr/bin/env Rscript + +# Description: +# This script serves as the backend for the Galaxy FACETS Analysis tool. +# It takes a SNP pileup file as input and performs allele-specific copy +# number analysis using the R package 'facets'. +# ============================================================================== + +# --- 1. Load Libraries --- +suppressPackageStartupMessages(library(argparse)) +suppressPackageStartupMessages(library(facets)) + +# --- 2. Define and Parse Arguments --- + +# Create the parser +parser <- ArgumentParser(description = "Run FACETS algorithm on a SNP pileup file.") + +# Define arguments +parser$add_argument("--pileup", + type = "character", required = TRUE, + help = "Path to the gzipped pileup CSV file." +) +parser$add_argument("--sample_id", + type = "character", required = TRUE, + help = "Sample ID used for plot titles and metadata." +) + +parser$add_argument("--output_seg", + type = "character", required = TRUE, + help = "Path for the output segmentation file (TSV)." +) +parser$add_argument("--output_summary", + type = "character", required = TRUE, + help = "Path for the output summary file (TSV)." +) +parser$add_argument("--output_plots", + type = "character", required = TRUE, + help = "Path for the output plots file (PDF)." +) + +parser$add_argument("--cval", + type = "double", default = 150, + help = "Critical value for segmentation." +) +parser$add_argument("--min_nhet", + type = "integer", default = 25, + help = "Minimum number of heterozygous SNPs per segment." +) +parser$add_argument("--snp_nbhd", + type = "integer", default = 300, + help = "SNP neighborhood size for pre-processing. Crucial for sparse VCFs." +) +parser$add_argument("--gbuild", + type = "character", default = "hg38", + choices = c("hg19", "hg38", "hg18", "mm9", "mm10"), + help = "Genome build used for alignment." +) + +# --- 3. Main Analysis Function --- +main <- function(args) { + # Set seed for reproducibility + set.seed(1965) + + # --- Read the data with readSnpMatrix() from facets --- + rcmat <- readSnpMatrix(args$pileup) + + # --- Pre-process sample --- + xx <- preProcSample(rcmat, gbuild = args$gbuild, snp.nbhd = args$snp_nbhd) + + # --- Process sample (segmentation) --- + oo <- procSample(xx, cval = args$cval, min.nhet = args$min_nhet) + + # --- Estimate ploidy/purity --- + fit <- emcncf(oo) + + # Write the main segmentation file + cncf_output <- fit$cncf + if (nrow(cncf_output) > 0) { + cncf_output$purity <- fit$purity + cncf_output$ploidy <- fit$ploidy + # Reorder columns to have purity/ploidy first for clarity + cncf_output <- cncf_output[, c("purity", "ploidy", setdiff(names(cncf_output), c("purity", "ploidy")))] + } + write.table(cncf_output, file = args$output_seg, sep = "\t", quote = FALSE, row.names = FALSE) + + # Write a key-value summary file + # A NULL value is replaced by NA to preserve vector length. + summary_df <- data.frame( + Parameter = c( + "sample_id", "purity", "ploidy", "dipLogR", "loglik", + "cval_param", "min_nhet_param", "snp_nbhd_param", "gbuild_param" + ), + Value = c( + args$sample_id, + ifelse(is.null(fit$purity), NA, fit$purity), + ifelse(is.null(fit$ploidy), NA, fit$ploidy), + ifelse(is.null(fit$dipLogR), NA, fit$dipLogR), + ifelse(is.null(fit$loglik), NA, fit$loglik), + args$cval, + args$min_nhet, + args$snp_nbhd, + args$gbuild + ) + ) + write.table(summary_df, file = args$output_summary, sep = "\t", quote = FALSE, row.names = FALSE) + + # Generate the plots PDF + pdf(file = args$output_plots, width = 12, height = 8) + plotSample(x = oo, emfit = fit, sname = args$sample_id) + dev.off() +} + +# --- 4. Execution Block --- +if (!interactive()) { + args <- parser$parse_args() + main(args) +}
