comparison facets_analysis.R @ 2:66a56502199d draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
author artbio
date Fri, 03 Oct 2025 23:59:36 +0000
parents
children d1914f4d9daf
comparison
equal deleted inserted replaced
1:8e1fb7253c1d 2:66a56502199d
1 #!/usr/bin/env Rscript
2
3 # Description:
4 # This script serves as the backend for the Galaxy FACETS Analysis tool.
5 # It takes a SNP pileup file as input and performs allele-specific copy
6 # number analysis using the R package 'facets'.
7 # ==============================================================================
8
9 # --- 1. Load Libraries ---
10 suppressPackageStartupMessages(library(argparse))
11 suppressPackageStartupMessages(library(facets))
12
13 # --- 2. Define and Parse Arguments ---
14
15 # Create the parser
16 parser <- ArgumentParser(description = "Run FACETS algorithm on a SNP pileup file.")
17
18 # Define arguments
19 parser$add_argument("--pileup",
20 type = "character", required = TRUE,
21 help = "Path to the gzipped pileup CSV file."
22 )
23 parser$add_argument("--sample_id",
24 type = "character", required = TRUE,
25 help = "Sample ID used for plot titles and metadata."
26 )
27
28 parser$add_argument("--output_seg",
29 type = "character", required = TRUE,
30 help = "Path for the output segmentation file (TSV)."
31 )
32 parser$add_argument("--output_summary",
33 type = "character", required = TRUE,
34 help = "Path for the output summary file (TSV)."
35 )
36 parser$add_argument("--output_plots",
37 type = "character", required = TRUE,
38 help = "Path for the output plots file (PDF)."
39 )
40
41 parser$add_argument("--cval",
42 type = "double", default = 150,
43 help = "Critical value for segmentation."
44 )
45 parser$add_argument("--min_nhet",
46 type = "integer", default = 25,
47 help = "Minimum number of heterozygous SNPs per segment."
48 )
49 parser$add_argument("--snp_nbhd",
50 type = "integer", default = 300,
51 help = "SNP neighborhood size for pre-processing. Crucial for sparse VCFs."
52 )
53 parser$add_argument("--gbuild",
54 type = "character", default = "hg38",
55 choices = c("hg19", "hg38", "hg18", "mm9", "mm10"),
56 help = "Genome build used for alignment."
57 )
58
59 # --- 3. Main Analysis Function ---
60 main <- function(args) {
61 # Set seed for reproducibility
62 set.seed(1965)
63
64 # --- Read the data with readSnpMatrix() from facets ---
65 rcmat <- readSnpMatrix(args$pileup)
66
67 # --- Pre-process sample ---
68 xx <- preProcSample(rcmat, gbuild = args$gbuild, snp.nbhd = args$snp_nbhd)
69
70 # --- Process sample (segmentation) ---
71 oo <- procSample(xx, cval = args$cval, min.nhet = args$min_nhet)
72
73 # --- Estimate ploidy/purity ---
74 fit <- emcncf(oo)
75
76 # Write the main segmentation file
77 cncf_output <- fit$cncf
78 if (nrow(cncf_output) > 0) {
79 cncf_output$purity <- fit$purity
80 cncf_output$ploidy <- fit$ploidy
81 # Reorder columns to have purity/ploidy first for clarity
82 cncf_output <- cncf_output[, c("purity", "ploidy", setdiff(names(cncf_output), c("purity", "ploidy")))]
83 }
84 write.table(cncf_output, file = args$output_seg, sep = "\t", quote = FALSE, row.names = FALSE)
85
86 # Write a key-value summary file
87 # A NULL value is replaced by NA to preserve vector length.
88 summary_df <- data.frame(
89 Parameter = c(
90 "sample_id", "purity", "ploidy", "dipLogR", "loglik",
91 "cval_param", "min_nhet_param", "snp_nbhd_param", "gbuild_param"
92 ),
93 Value = c(
94 args$sample_id,
95 ifelse(is.null(fit$purity), NA, fit$purity),
96 ifelse(is.null(fit$ploidy), NA, fit$ploidy),
97 ifelse(is.null(fit$dipLogR), NA, fit$dipLogR),
98 ifelse(is.null(fit$loglik), NA, fit$loglik),
99 args$cval,
100 args$min_nhet,
101 args$snp_nbhd,
102 args$gbuild
103 )
104 )
105 write.table(summary_df, file = args$output_summary, sep = "\t", quote = FALSE, row.names = FALSE)
106
107 # Generate the plots PDF
108 pdf(file = args$output_plots, width = 12, height = 8)
109 plotSample(x = oo, emfit = fit, sname = args$sample_id)
110 dev.off()
111 }
112
113 # --- 4. Execution Block ---
114 if (!interactive()) {
115 args <- parser$parse_args()
116 main(args)
117 }