Mercurial > repos > artbio > cnv_facets
comparison facets_analysis.R @ 2:66a56502199d draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
| author | artbio |
|---|---|
| date | Fri, 03 Oct 2025 23:59:36 +0000 |
| parents | |
| children | d1914f4d9daf |
comparison
equal
deleted
inserted
replaced
| 1:8e1fb7253c1d | 2:66a56502199d |
|---|---|
| 1 #!/usr/bin/env Rscript | |
| 2 | |
| 3 # Description: | |
| 4 # This script serves as the backend for the Galaxy FACETS Analysis tool. | |
| 5 # It takes a SNP pileup file as input and performs allele-specific copy | |
| 6 # number analysis using the R package 'facets'. | |
| 7 # ============================================================================== | |
| 8 | |
| 9 # --- 1. Load Libraries --- | |
| 10 suppressPackageStartupMessages(library(argparse)) | |
| 11 suppressPackageStartupMessages(library(facets)) | |
| 12 | |
| 13 # --- 2. Define and Parse Arguments --- | |
| 14 | |
| 15 # Create the parser | |
| 16 parser <- ArgumentParser(description = "Run FACETS algorithm on a SNP pileup file.") | |
| 17 | |
| 18 # Define arguments | |
| 19 parser$add_argument("--pileup", | |
| 20 type = "character", required = TRUE, | |
| 21 help = "Path to the gzipped pileup CSV file." | |
| 22 ) | |
| 23 parser$add_argument("--sample_id", | |
| 24 type = "character", required = TRUE, | |
| 25 help = "Sample ID used for plot titles and metadata." | |
| 26 ) | |
| 27 | |
| 28 parser$add_argument("--output_seg", | |
| 29 type = "character", required = TRUE, | |
| 30 help = "Path for the output segmentation file (TSV)." | |
| 31 ) | |
| 32 parser$add_argument("--output_summary", | |
| 33 type = "character", required = TRUE, | |
| 34 help = "Path for the output summary file (TSV)." | |
| 35 ) | |
| 36 parser$add_argument("--output_plots", | |
| 37 type = "character", required = TRUE, | |
| 38 help = "Path for the output plots file (PDF)." | |
| 39 ) | |
| 40 | |
| 41 parser$add_argument("--cval", | |
| 42 type = "double", default = 150, | |
| 43 help = "Critical value for segmentation." | |
| 44 ) | |
| 45 parser$add_argument("--min_nhet", | |
| 46 type = "integer", default = 25, | |
| 47 help = "Minimum number of heterozygous SNPs per segment." | |
| 48 ) | |
| 49 parser$add_argument("--snp_nbhd", | |
| 50 type = "integer", default = 300, | |
| 51 help = "SNP neighborhood size for pre-processing. Crucial for sparse VCFs." | |
| 52 ) | |
| 53 parser$add_argument("--gbuild", | |
| 54 type = "character", default = "hg38", | |
| 55 choices = c("hg19", "hg38", "hg18", "mm9", "mm10"), | |
| 56 help = "Genome build used for alignment." | |
| 57 ) | |
| 58 | |
| 59 # --- 3. Main Analysis Function --- | |
| 60 main <- function(args) { | |
| 61 # Set seed for reproducibility | |
| 62 set.seed(1965) | |
| 63 | |
| 64 # --- Read the data with readSnpMatrix() from facets --- | |
| 65 rcmat <- readSnpMatrix(args$pileup) | |
| 66 | |
| 67 # --- Pre-process sample --- | |
| 68 xx <- preProcSample(rcmat, gbuild = args$gbuild, snp.nbhd = args$snp_nbhd) | |
| 69 | |
| 70 # --- Process sample (segmentation) --- | |
| 71 oo <- procSample(xx, cval = args$cval, min.nhet = args$min_nhet) | |
| 72 | |
| 73 # --- Estimate ploidy/purity --- | |
| 74 fit <- emcncf(oo) | |
| 75 | |
| 76 # Write the main segmentation file | |
| 77 cncf_output <- fit$cncf | |
| 78 if (nrow(cncf_output) > 0) { | |
| 79 cncf_output$purity <- fit$purity | |
| 80 cncf_output$ploidy <- fit$ploidy | |
| 81 # Reorder columns to have purity/ploidy first for clarity | |
| 82 cncf_output <- cncf_output[, c("purity", "ploidy", setdiff(names(cncf_output), c("purity", "ploidy")))] | |
| 83 } | |
| 84 write.table(cncf_output, file = args$output_seg, sep = "\t", quote = FALSE, row.names = FALSE) | |
| 85 | |
| 86 # Write a key-value summary file | |
| 87 # A NULL value is replaced by NA to preserve vector length. | |
| 88 summary_df <- data.frame( | |
| 89 Parameter = c( | |
| 90 "sample_id", "purity", "ploidy", "dipLogR", "loglik", | |
| 91 "cval_param", "min_nhet_param", "snp_nbhd_param", "gbuild_param" | |
| 92 ), | |
| 93 Value = c( | |
| 94 args$sample_id, | |
| 95 ifelse(is.null(fit$purity), NA, fit$purity), | |
| 96 ifelse(is.null(fit$ploidy), NA, fit$ploidy), | |
| 97 ifelse(is.null(fit$dipLogR), NA, fit$dipLogR), | |
| 98 ifelse(is.null(fit$loglik), NA, fit$loglik), | |
| 99 args$cval, | |
| 100 args$min_nhet, | |
| 101 args$snp_nbhd, | |
| 102 args$gbuild | |
| 103 ) | |
| 104 ) | |
| 105 write.table(summary_df, file = args$output_summary, sep = "\t", quote = FALSE, row.names = FALSE) | |
| 106 | |
| 107 # Generate the plots PDF | |
| 108 pdf(file = args$output_plots, width = 12, height = 8) | |
| 109 plotSample(x = oo, emfit = fit, sname = args$sample_id) | |
| 110 dev.off() | |
| 111 } | |
| 112 | |
| 113 # --- 4. Execution Block --- | |
| 114 if (!interactive()) { | |
| 115 args <- parser$parse_args() | |
| 116 main(args) | |
| 117 } |
