Mercurial > repos > artbio > cnv_facets
comparison facets_analysis.R @ 7:86bcdc94b008 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
| author | artbio |
|---|---|
| date | Wed, 08 Oct 2025 17:41:18 +0000 |
| parents | 625038b7d764 |
| children |
comparison
equal
deleted
inserted
replaced
| 6:625038b7d764 | 7:86bcdc94b008 |
|---|---|
| 79 ) | 79 ) |
| 80 parser$add_argument("--merge_gap_rel", | 80 parser$add_argument("--merge_gap_rel", |
| 81 type = "double", default = 0.5, | 81 type = "double", default = 0.5, |
| 82 help = "Relative maximum gap (fraction of avg. segment length) to merge segments." | 82 help = "Relative maximum gap (fraction of avg. segment length) to merge segments." |
| 83 ) | 83 ) |
| 84 | 84 parser$add_argument("--vcf_min_nhet", |
| 85 type = "integer", default = 2, | |
| 86 help = "VCF Post-Filter: Minimum number of heterozygous SNPs for a segment to be kept." | |
| 87 ) | |
| 88 parser$add_argument("--vcf_min_num_mark", | |
| 89 type = "integer", default = 3, | |
| 90 help = "VCF Post-Filter: Minimum number of total markers for a segment to be kept." | |
| 91 ) | |
| 85 #' Classify CNV segments based on TCN/LCN | 92 #' Classify CNV segments based on TCN/LCN |
| 86 classify_cnv <- function(cncf_df) { | 93 classify_cnv <- function(cncf_df) { |
| 87 cncf_df$sv_type <- NA_character_ | 94 cncf_df$sv_type <- NA_character_ |
| 88 cncf_df$sv_type[cncf_df$tcn.em == 2 & (cncf_df$lcn.em == 1 | is.na(cncf_df$lcn.em))] <- "NEUTR" | 95 cncf_df$sv_type[cncf_df$tcn.em == 2 & (cncf_df$lcn.em == 1 | is.na(cncf_df$lcn.em))] <- "NEUTR" |
| 89 cncf_df$sv_type[is.na(cncf_df$sv_type) & cncf_df$tcn.em > 2] <- "DUP" | 96 cncf_df$sv_type[is.na(cncf_df$sv_type) & cncf_df$tcn.em > 2] <- "DUP" |
| 281 cnv_df = cnv_calls, | 288 cnv_df = cnv_calls, |
| 282 max_gap_abs = args$merge_gap_abs, | 289 max_gap_abs = args$merge_gap_abs, |
| 283 max_gap_rel = args$merge_gap_rel | 290 max_gap_rel = args$merge_gap_rel |
| 284 ) | 291 ) |
| 285 } | 292 } |
| 293 # Apply VCF post-filters to remove low-quality/artefactual segments | |
| 294 # This addresses the issue of FACETS' EM algorithm sometimes creating | |
| 295 # micro-segments that bypass the initial min.nhet segmentation parameter. | |
| 296 original_rows <- nrow(cnv_calls) | |
| 297 cnv_calls <- cnv_calls[ | |
| 298 cnv_calls$nhet >= args$vcf_min_nhet & | |
| 299 cnv_calls$num.mark >= args$vcf_min_num_mark, | |
| 300 ] | |
| 301 cat(paste("Applied VCF post-filters: kept", nrow(cnv_calls), "of", original_rows, "segments.\n")) | |
| 302 | |
| 286 vcf_header <- create_vcf_header(args$sample_id, fit$purity, fit$ploidy) | 303 vcf_header <- create_vcf_header(args$sample_id, fit$purity, fit$ploidy) |
| 287 | 304 |
| 288 vcf_body <- apply(cnv_calls, 1, function(seg) { | 305 vcf_body <- apply(cnv_calls, 1, function(seg) { |
| 289 cnv_calls <- merge_segments(cnv_calls) | 306 cnv_calls <- merge_segments(cnv_calls) |
| 290 alt_allele <- paste0("<", seg["svtype"], ">") | 307 alt_allele <- paste0("<", seg["svtype"], ">") |
