comparison facets_analysis.R @ 7:86bcdc94b008 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
author artbio
date Wed, 08 Oct 2025 17:41:18 +0000
parents 625038b7d764
children
comparison
equal deleted inserted replaced
6:625038b7d764 7:86bcdc94b008
79 ) 79 )
80 parser$add_argument("--merge_gap_rel", 80 parser$add_argument("--merge_gap_rel",
81 type = "double", default = 0.5, 81 type = "double", default = 0.5,
82 help = "Relative maximum gap (fraction of avg. segment length) to merge segments." 82 help = "Relative maximum gap (fraction of avg. segment length) to merge segments."
83 ) 83 )
84 84 parser$add_argument("--vcf_min_nhet",
85 type = "integer", default = 2,
86 help = "VCF Post-Filter: Minimum number of heterozygous SNPs for a segment to be kept."
87 )
88 parser$add_argument("--vcf_min_num_mark",
89 type = "integer", default = 3,
90 help = "VCF Post-Filter: Minimum number of total markers for a segment to be kept."
91 )
85 #' Classify CNV segments based on TCN/LCN 92 #' Classify CNV segments based on TCN/LCN
86 classify_cnv <- function(cncf_df) { 93 classify_cnv <- function(cncf_df) {
87 cncf_df$sv_type <- NA_character_ 94 cncf_df$sv_type <- NA_character_
88 cncf_df$sv_type[cncf_df$tcn.em == 2 & (cncf_df$lcn.em == 1 | is.na(cncf_df$lcn.em))] <- "NEUTR" 95 cncf_df$sv_type[cncf_df$tcn.em == 2 & (cncf_df$lcn.em == 1 | is.na(cncf_df$lcn.em))] <- "NEUTR"
89 cncf_df$sv_type[is.na(cncf_df$sv_type) & cncf_df$tcn.em > 2] <- "DUP" 96 cncf_df$sv_type[is.na(cncf_df$sv_type) & cncf_df$tcn.em > 2] <- "DUP"
281 cnv_df = cnv_calls, 288 cnv_df = cnv_calls,
282 max_gap_abs = args$merge_gap_abs, 289 max_gap_abs = args$merge_gap_abs,
283 max_gap_rel = args$merge_gap_rel 290 max_gap_rel = args$merge_gap_rel
284 ) 291 )
285 } 292 }
293 # Apply VCF post-filters to remove low-quality/artefactual segments
294 # This addresses the issue of FACETS' EM algorithm sometimes creating
295 # micro-segments that bypass the initial min.nhet segmentation parameter.
296 original_rows <- nrow(cnv_calls)
297 cnv_calls <- cnv_calls[
298 cnv_calls$nhet >= args$vcf_min_nhet &
299 cnv_calls$num.mark >= args$vcf_min_num_mark,
300 ]
301 cat(paste("Applied VCF post-filters: kept", nrow(cnv_calls), "of", original_rows, "segments.\n"))
302
286 vcf_header <- create_vcf_header(args$sample_id, fit$purity, fit$ploidy) 303 vcf_header <- create_vcf_header(args$sample_id, fit$purity, fit$ploidy)
287 304
288 vcf_body <- apply(cnv_calls, 1, function(seg) { 305 vcf_body <- apply(cnv_calls, 1, function(seg) {
289 cnv_calls <- merge_segments(cnv_calls) 306 cnv_calls <- merge_segments(cnv_calls)
290 alt_allele <- paste0("<", seg["svtype"], ">") 307 alt_allele <- paste0("<", seg["svtype"], ">")