annotate facets_analysis.R @ 8:e8a8a4910e32 draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2a0f9aee1c61e12ab9f0e25a6ba7db5c08b67fe6
author artbio
date Thu, 09 Oct 2025 17:14:30 +0000
parents 86bcdc94b008
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
1 #!/usr/bin/env Rscript
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
2
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
3 # Description:
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
4 # This script serves as the backend for the Galaxy FACETS Analysis tool.
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
5 # It takes a SNP pileup file as input and performs allele-specific copy
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
6 # number analysis using the R package 'facets'.
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
7 # ==============================================================================
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
8
3
d1914f4d9daf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents: 2
diff changeset
9 # --- Load Libraries ---
2
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
10 suppressPackageStartupMessages(library(argparse))
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
11 suppressPackageStartupMessages(library(facets))
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
12
3
d1914f4d9daf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents: 2
diff changeset
13 # --- Source the external plot_facets_enhanced function ---
d1914f4d9daf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents: 2
diff changeset
14 # This finds the path of the currently running script and sources
d1914f4d9daf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents: 2
diff changeset
15 # the R function file relative to it.
d1914f4d9daf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents: 2
diff changeset
16 initial_opts <- commandArgs(trailingOnly = FALSE)
d1914f4d9daf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents: 2
diff changeset
17 script_path <- dirname(sub("--file=", "", initial_opts[grep("--file=", initial_opts)]))
d1914f4d9daf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents: 2
diff changeset
18 source(file.path(script_path, "plot_facets_enhanced-v22.R"))
d1914f4d9daf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents: 2
diff changeset
19
d1914f4d9daf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents: 2
diff changeset
20 # --- Define and Parse Arguments ---
2
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
21
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
22 # Create the parser
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
23 parser <- ArgumentParser(description = "Run FACETS algorithm on a SNP pileup file.")
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
24
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
25 # Define arguments
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
26 parser$add_argument("--pileup",
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
27 type = "character", required = TRUE,
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
28 help = "Path to the gzipped pileup CSV file."
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
29 )
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
30 parser$add_argument("--sample_id",
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
31 type = "character", required = TRUE,
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
32 help = "Sample ID used for plot titles and metadata."
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
33 )
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
34
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
35 parser$add_argument("--output_seg",
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
36 type = "character", required = TRUE,
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
37 help = "Path for the output segmentation file (TSV)."
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
38 )
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
39 parser$add_argument("--output_summary",
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
40 type = "character", required = TRUE,
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
41 help = "Path for the output summary file (TSV)."
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
42 )
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
43 parser$add_argument("--output_plots",
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
44 type = "character", required = TRUE,
3
d1914f4d9daf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents: 2
diff changeset
45 help = "Path for the main output plots file (PNG)."
2
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
46 )
3
d1914f4d9daf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents: 2
diff changeset
47 parser$add_argument("--output_spider",
d1914f4d9daf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents: 2
diff changeset
48 type = "character", required = TRUE,
d1914f4d9daf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents: 2
diff changeset
49 help = "Path for the diagnostic spider plot file (PNG)."
d1914f4d9daf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents: 2
diff changeset
50 )
4
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
51 parser$add_argument("--output_vcf",
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
52 type = "character", required = TRUE,
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
53 help = "Path for the output VCF file with CNV calls."
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
54 )
2
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
55 parser$add_argument("--cval",
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
56 type = "double", default = 150,
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
57 help = "Critical value for segmentation."
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
58 )
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
59 parser$add_argument("--min_nhet",
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
60 type = "integer", default = 25,
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
61 help = "Minimum number of heterozygous SNPs per segment."
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
62 )
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
63 parser$add_argument("--snp_nbhd",
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
64 type = "integer", default = 300,
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
65 help = "SNP neighborhood size for pre-processing. Crucial for sparse VCFs."
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
66 )
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
67 parser$add_argument("--gbuild",
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
68 type = "character", default = "hg38",
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
69 choices = c("hg19", "hg38", "hg18", "mm9", "mm10"),
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
70 help = "Genome build used for alignment."
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
71 )
6
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
72 parser$add_argument("--enable_merging",
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
73 action = "store_true", default = FALSE,
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
74 help = "If specified, enables the post-processing step to merge adjacent and similar CNV segments."
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
75 )
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
76 parser$add_argument("--merge_gap_abs",
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
77 type = "integer", default = 1000000,
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
78 help = "Absolute maximum gap in bp to merge adjacent CNV segments."
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
79 )
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
80 parser$add_argument("--merge_gap_rel",
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
81 type = "double", default = 0.5,
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
82 help = "Relative maximum gap (fraction of avg. segment length) to merge segments."
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
83 )
7
86bcdc94b008 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents: 6
diff changeset
84 parser$add_argument("--vcf_min_nhet",
86bcdc94b008 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents: 6
diff changeset
85 type = "integer", default = 2,
86bcdc94b008 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents: 6
diff changeset
86 help = "VCF Post-Filter: Minimum number of heterozygous SNPs for a segment to be kept."
86bcdc94b008 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents: 6
diff changeset
87 )
86bcdc94b008 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents: 6
diff changeset
88 parser$add_argument("--vcf_min_num_mark",
86bcdc94b008 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents: 6
diff changeset
89 type = "integer", default = 3,
86bcdc94b008 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents: 6
diff changeset
90 help = "VCF Post-Filter: Minimum number of total markers for a segment to be kept."
86bcdc94b008 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents: 6
diff changeset
91 )
4
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
92 #' Classify CNV segments based on TCN/LCN
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
93 classify_cnv <- function(cncf_df) {
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
94 cncf_df$sv_type <- NA_character_
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
95 cncf_df$sv_type[cncf_df$tcn.em == 2 & (cncf_df$lcn.em == 1 | is.na(cncf_df$lcn.em))] <- "NEUTR"
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
96 cncf_df$sv_type[is.na(cncf_df$sv_type) & cncf_df$tcn.em > 2] <- "DUP"
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
97 cncf_df$sv_type[is.na(cncf_df$sv_type) & cncf_df$tcn.em < 2 & !is.na(cncf_df$lcn.em) & cncf_df$lcn.em > 0] <- "HEMIZYG_DEL"
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
98 cncf_df$sv_type[is.na(cncf_df$sv_type) & cncf_df$tcn.em < 2 & !is.na(cncf_df$lcn.em) & cncf_df$lcn.em == 0] <- "HOMOZYG_DEL"
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
99 cncf_df$sv_type[is.na(cncf_df$sv_type) & cncf_df$tcn.em == 2 & !is.na(cncf_df$lcn.em) & cncf_df$lcn.em == 0] <- "CN_LOH"
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
100
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
101 # Remplacer les NA restants (si tcn.em < 2 mais lcn.em est NA) par un type général
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
102 cncf_df$sv_type[is.na(cncf_df$sv_type) & cncf_df$tcn.em < 2] <- "DEL"
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
103
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
104 return(cncf_df)
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
105 }
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
106
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
107 #' Create a VCF header (explicit version)
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
108 create_vcf_header <- function(sample_id, purity, ploidy) {
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
109 header <- c(
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
110 "##fileformat=VCFv4.2",
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
111 paste0("##fileDate=", format(Sys.Date(), "%Y%m%d")),
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
112 paste0("##source=FACETS_v", packageVersion("facets")),
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
113 "##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of the variant\">",
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
114 "##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"Type of structural variant (standard VCF tags: DEL, DUP, CNV)\">",
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
115 "##INFO=<ID=SVLEN,Number=1,Type=Integer,Description=\"Length of the SV\">",
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
116 # --- MODIFICATION ---
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
117 "##INFO=<ID=EVENT,Number=1,Type=String,Description=\"FACETS event classification. Possible values: DUP, HEMIZYG_DEL, HOMOZYG_DEL, CN_LOH\">",
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
118 # --- FIN MODIFICATION ---
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
119 "##INFO=<ID=TCN,Number=1,Type=Integer,Description=\"Total Copy Number (EM fit)\">",
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
120 "##INFO=<ID=LCN,Number=1,Type=Integer,Description=\"Lesser Copy Number (EM fit)\">",
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
121 "##INFO=<ID=NUM_MARK,Number=1,Type=Integer,Description=\"Number of SNPs in the segment\">",
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
122 "##INFO=<ID=NHET,Number=1,Type=Integer,Description=\"Number of heterozygous SNPs in the segment\">",
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
123 paste0("##FACETS_PURITY=", round(purity, 4)),
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
124 paste0("##FACETS_PLOIDY=", round(ploidy, 4)),
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
125 "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
126 )
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
127 return(header)
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
128 }
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
129
6
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
130 # ==============================================================================
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
131 # Function to merge adjacent and similar CNV segments
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
132 #
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
133 # This function implements a merging algorithm that reflects human
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
134 # by using a hybrid proximity condition. Two segments are merged
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
135 # if they have the same CNV state and are close to each other, both in absolute
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
136 # and relative terms.
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
137 #
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
138 # @param cnv_df A data frame of CNV calls, expected to have columns like
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
139 # 'chrom', 'start', 'end', 'svtype', 'tcn.em', 'lcn.em', etc.
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
140 # @param max_gap_abs An integer. The absolute maximum distance (in bp) allowed
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
141 # between two segments to consider them for merging. Acts as a safeguard.
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
142 # @param max_gap_rel A numeric value (0 to 1). The maximum relative distance,
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
143 # expressed as a fraction of the average length of the two adjacent segments.
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
144 # @return A new data frame with the similar adjacent segments merged.
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
145 # ==============================================================================
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
146 merge_segments <- function(cnv_df, max_gap_abs = 1000000, max_gap_rel = 0.5) {
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
147 # If there's nothing to merge, return the original data frame
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
148 if (nrow(cnv_df) < 2) {
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
149 return(cnv_df)
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
150 }
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
151
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
152 # Ensure the data frame is sorted by genomic position
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
153 cnv_df <- cnv_df[order(cnv_df$chrom, cnv_df$start), ]
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
154
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
155 merged_rows <- list()
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
156 current_row <- cnv_df[1, ]
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
157
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
158 for (i in 2:nrow(cnv_df)) {
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
159 next_row <- cnv_df[i, ]
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
160
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
161 # Basic criteria: segments must be of the same type and CN state
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
162 same_chrom <- current_row$chrom == next_row$chrom
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
163 same_svtype <- current_row$svtype == next_row$svtype
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
164 same_event <- current_row$event == next_row$event
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
165 same_tcn <- current_row$tcn.em == next_row$tcn.em
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
166 same_lcn <- identical(current_row$lcn.em, next_row$lcn.em) # Handles NA safely
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
167
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
168 # If basic criteria are met, evaluate proximity
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
169 if (same_chrom && same_svtype && same_event && same_tcn && same_lcn) {
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
170 gap <- next_row$start - current_row$end
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
171
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
172 # Calculate the relative threshold based on the average size of the two segments
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
173 len_a <- current_row$end - current_row$start
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
174 len_b <- next_row$end - next_row$start
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
175 relative_threshold <- ((len_a + len_b) / 2) * max_gap_rel
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
176
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
177 # Hybrid merge condition: gap must be below BOTH absolute and relative thresholds
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
178 if (gap >= 0 && gap <= max_gap_abs && gap <= relative_threshold) {
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
179 # Merge: update the end of the current segment and aggregate numeric fields
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
180 current_row$end <- next_row$end
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
181 current_row$num.mark <- current_row$num.mark + next_row$num.mark
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
182 current_row$nhet <- current_row$nhet + next_row$nhet
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
183
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
184 # Skip to the next iteration to try merging the newly expanded segment
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
185 # with the one that follows.
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
186 next
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
187 }
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
188 }
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
189
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
190 # If no merge occurred, the current segment is final. Save it.
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
191 merged_rows <- append(merged_rows, list(current_row))
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
192 # The next segment becomes the new current segment.
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
193 current_row <- next_row
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
194 }
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
195
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
196 # Append the very last segment (which is either a standalone or the result of the last merge)
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
197 merged_rows <- append(merged_rows, list(current_row))
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
198
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
199 # Reconstruct a single data frame from the list of merged rows
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
200 do.call(rbind, merged_rows)
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
201 }
3
d1914f4d9daf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents: 2
diff changeset
202 # --- Main Analysis Function ---
2
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
203 main <- function(args) {
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
204 # Set seed for reproducibility
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
205 set.seed(1965)
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
206
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
207 # --- Read the data with readSnpMatrix() from facets ---
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
208 rcmat <- readSnpMatrix(args$pileup)
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
209
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
210 # --- Pre-process sample ---
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
211 xx <- preProcSample(rcmat, gbuild = args$gbuild, snp.nbhd = args$snp_nbhd)
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
212
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
213 # --- Process sample (segmentation) ---
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
214 oo <- procSample(xx, cval = args$cval, min.nhet = args$min_nhet)
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
215
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
216 # --- Estimate ploidy/purity ---
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
217 fit <- emcncf(oo)
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
218
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
219 # Write the main segmentation file
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
220 cncf_output <- fit$cncf
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
221 if (nrow(cncf_output) > 0) {
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
222 cncf_output$purity <- fit$purity
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
223 cncf_output$ploidy <- fit$ploidy
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
224 # Reorder columns to have purity/ploidy first for clarity
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
225 cncf_output <- cncf_output[, c("purity", "ploidy", setdiff(names(cncf_output), c("purity", "ploidy")))]
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
226 }
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
227 write.table(cncf_output, file = args$output_seg, sep = "\t", quote = FALSE, row.names = FALSE)
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
228
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
229 # Write a key-value summary file
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
230 # A NULL value is replaced by NA to preserve vector length.
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
231 summary_df <- data.frame(
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
232 Parameter = c(
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
233 "sample_id", "purity", "ploidy", "dipLogR", "loglik",
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
234 "cval_param", "min_nhet_param", "snp_nbhd_param", "gbuild_param"
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
235 ),
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
236 Value = c(
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
237 args$sample_id,
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
238 ifelse(is.null(fit$purity), NA, fit$purity),
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
239 ifelse(is.null(fit$ploidy), NA, fit$ploidy),
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
240 ifelse(is.null(fit$dipLogR), NA, fit$dipLogR),
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
241 ifelse(is.null(fit$loglik), NA, fit$loglik),
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
242 args$cval,
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
243 args$min_nhet,
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
244 args$snp_nbhd,
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
245 args$gbuild
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
246 )
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
247 )
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
248 write.table(summary_df, file = args$output_summary, sep = "\t", quote = FALSE, row.names = FALSE)
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
249
3
d1914f4d9daf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents: 2
diff changeset
250 # Generate the plots PNG
d1914f4d9daf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents: 2
diff changeset
251 png(file = args$output_plots, width = 12, height = 8, units = "in", res = 300)
2
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
252 plotSample(x = oo, emfit = fit, sname = args$sample_id)
3
d1914f4d9daf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents: 2
diff changeset
253 plot_facets_enhanced(oo, emfit = fit, plot.type = "em", sname = args$sample_id)
d1914f4d9daf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents: 2
diff changeset
254 dev.off()
d1914f4d9daf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents: 2
diff changeset
255 png(file = args$output_spider, width = 8, height = 8, units = "in", res = 300)
d1914f4d9daf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents: 2
diff changeset
256 logRlogORspider(oo$out, oo$dipLogR)
2
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
257 dev.off()
4
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
258
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
259 # --- Generate VCF file ---
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
260
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
261 # Classify segments and define standard SVTYPEs + detailed EVENTs
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
262 cncf_for_vcf <- fit$cncf
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
263 if (nrow(cncf_for_vcf) > 0) {
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
264 cncf_for_vcf$svtype <- NA_character_
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
265 cncf_for_vcf$event <- NA_character_
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
266
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
267 # Duplications
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
268 cncf_for_vcf[cncf_for_vcf$tcn.em > 2, c("svtype", "event")] <- c("DUP", "DUP")
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
269
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
270 # Deletions
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
271 cncf_for_vcf[cncf_for_vcf$tcn.em < 2, c("svtype")] <- "DEL"
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
272 cncf_for_vcf[cncf_for_vcf$tcn.em == 1, c("event")] <- "HEMIZYG_DEL"
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
273 cncf_for_vcf[cncf_for_vcf$tcn.em == 0, c("event")] <- "HOMOZYG_DEL"
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
274
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
275 # Copy-Neutral LOH
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
276 cncf_for_vcf[cncf_for_vcf$tcn.em == 2 & !is.na(cncf_for_vcf$lcn.em) & cncf_for_vcf$lcn.em == 0, c("svtype", "event")] <- c("CNV", "CN_LOH")
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
277
6
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
278 # Filter normal segments (where'svtype' is still NA)
4
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
279 cnv_calls <- cncf_for_vcf[!is.na(cncf_for_vcf$svtype), ]
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
280 } else {
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
281 cnv_calls <- data.frame()
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
282 }
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
283
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
284
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
285 if (nrow(cnv_calls) > 0) {
6
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
286 if (args$enable_merging) {
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
287 cnv_calls <- merge_segments(
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
288 cnv_df = cnv_calls,
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
289 max_gap_abs = args$merge_gap_abs,
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
290 max_gap_rel = args$merge_gap_rel
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
291 )
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
292 }
7
86bcdc94b008 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents: 6
diff changeset
293 # Apply VCF post-filters to remove low-quality/artefactual segments
86bcdc94b008 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents: 6
diff changeset
294 # This addresses the issue of FACETS' EM algorithm sometimes creating
86bcdc94b008 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents: 6
diff changeset
295 # micro-segments that bypass the initial min.nhet segmentation parameter.
86bcdc94b008 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents: 6
diff changeset
296 original_rows <- nrow(cnv_calls)
86bcdc94b008 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents: 6
diff changeset
297 cnv_calls <- cnv_calls[
86bcdc94b008 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents: 6
diff changeset
298 cnv_calls$nhet >= args$vcf_min_nhet &
86bcdc94b008 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents: 6
diff changeset
299 cnv_calls$num.mark >= args$vcf_min_num_mark,
86bcdc94b008 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents: 6
diff changeset
300 ]
86bcdc94b008 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents: 6
diff changeset
301 cat(paste("Applied VCF post-filters: kept", nrow(cnv_calls), "of", original_rows, "segments.\n"))
86bcdc94b008 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents: 6
diff changeset
302
4
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
303 vcf_header <- create_vcf_header(args$sample_id, fit$purity, fit$ploidy)
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
304
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
305 vcf_body <- apply(cnv_calls, 1, function(seg) {
6
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
306 cnv_calls <- merge_segments(cnv_calls)
4
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
307 alt_allele <- paste0("<", seg["svtype"], ">")
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
308 info <- paste0(
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
309 "END=", seg["end"],
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
310 ";SVTYPE=", seg["svtype"],
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
311 ";SVLEN=", as.integer(seg["end"]) - as.integer(seg["start"]),
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
312 ";TCN=", seg["tcn.em"],
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
313 ";LCN=", ifelse(is.na(seg["lcn.em"]), ".", seg["lcn.em"]),
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
314 ";EVENT=", seg["event"],
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
315 ";NUM_MARK=", seg["num.mark"],
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
316 ";NHET=", seg["nhet"]
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
317 )
6
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
318 # Remove any space(s) immediately following an '=' sign in the INFO string.
625038b7d764 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents: 4
diff changeset
319 info <- gsub("=\\s+", "=", info)
4
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
320
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
321 paste(seg["chrom"], seg["start"], ".", "N", alt_allele, ".", "PASS", info, sep = "\t")
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
322 })
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
323
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
324 writeLines(c(vcf_header, vcf_body), con = args$output_vcf)
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
325 } else {
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
326 vcf_header <- create_vcf_header(args$sample_id, fit$purity, fit$ploidy)
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
327 writeLines(vcf_header, con = args$output_vcf)
3f62267c4be7 planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents: 3
diff changeset
328 }
2
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
329 }
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
330
3
d1914f4d9daf planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents: 2
diff changeset
331 # --- Execution Block ---
2
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
332 if (!interactive()) {
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
333 args <- parser$parse_args()
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
334 main(args)
66a56502199d planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff changeset
335 }