Mercurial > repos > artbio > cnv_facets
annotate facets_analysis.R @ 8:e8a8a4910e32 draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2a0f9aee1c61e12ab9f0e25a6ba7db5c08b67fe6
| author | artbio |
|---|---|
| date | Thu, 09 Oct 2025 17:14:30 +0000 |
| parents | 86bcdc94b008 |
| children |
| rev | line source |
|---|---|
|
2
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
1 #!/usr/bin/env Rscript |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
2 |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
3 # Description: |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
4 # This script serves as the backend for the Galaxy FACETS Analysis tool. |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
5 # It takes a SNP pileup file as input and performs allele-specific copy |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
6 # number analysis using the R package 'facets'. |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
7 # ============================================================================== |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
8 |
|
3
d1914f4d9daf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents:
2
diff
changeset
|
9 # --- Load Libraries --- |
|
2
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
10 suppressPackageStartupMessages(library(argparse)) |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
11 suppressPackageStartupMessages(library(facets)) |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
12 |
|
3
d1914f4d9daf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents:
2
diff
changeset
|
13 # --- Source the external plot_facets_enhanced function --- |
|
d1914f4d9daf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents:
2
diff
changeset
|
14 # This finds the path of the currently running script and sources |
|
d1914f4d9daf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents:
2
diff
changeset
|
15 # the R function file relative to it. |
|
d1914f4d9daf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents:
2
diff
changeset
|
16 initial_opts <- commandArgs(trailingOnly = FALSE) |
|
d1914f4d9daf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents:
2
diff
changeset
|
17 script_path <- dirname(sub("--file=", "", initial_opts[grep("--file=", initial_opts)])) |
|
d1914f4d9daf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents:
2
diff
changeset
|
18 source(file.path(script_path, "plot_facets_enhanced-v22.R")) |
|
d1914f4d9daf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents:
2
diff
changeset
|
19 |
|
d1914f4d9daf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents:
2
diff
changeset
|
20 # --- Define and Parse Arguments --- |
|
2
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
21 |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
22 # Create the parser |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
23 parser <- ArgumentParser(description = "Run FACETS algorithm on a SNP pileup file.") |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
24 |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
25 # Define arguments |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
26 parser$add_argument("--pileup", |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
27 type = "character", required = TRUE, |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
28 help = "Path to the gzipped pileup CSV file." |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
29 ) |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
30 parser$add_argument("--sample_id", |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
31 type = "character", required = TRUE, |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
32 help = "Sample ID used for plot titles and metadata." |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
33 ) |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
34 |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
35 parser$add_argument("--output_seg", |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
36 type = "character", required = TRUE, |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
37 help = "Path for the output segmentation file (TSV)." |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
38 ) |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
39 parser$add_argument("--output_summary", |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
40 type = "character", required = TRUE, |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
41 help = "Path for the output summary file (TSV)." |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
42 ) |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
43 parser$add_argument("--output_plots", |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
44 type = "character", required = TRUE, |
|
3
d1914f4d9daf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents:
2
diff
changeset
|
45 help = "Path for the main output plots file (PNG)." |
|
2
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
46 ) |
|
3
d1914f4d9daf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents:
2
diff
changeset
|
47 parser$add_argument("--output_spider", |
|
d1914f4d9daf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents:
2
diff
changeset
|
48 type = "character", required = TRUE, |
|
d1914f4d9daf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents:
2
diff
changeset
|
49 help = "Path for the diagnostic spider plot file (PNG)." |
|
d1914f4d9daf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents:
2
diff
changeset
|
50 ) |
|
4
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
51 parser$add_argument("--output_vcf", |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
52 type = "character", required = TRUE, |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
53 help = "Path for the output VCF file with CNV calls." |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
54 ) |
|
2
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
55 parser$add_argument("--cval", |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
56 type = "double", default = 150, |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
57 help = "Critical value for segmentation." |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
58 ) |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
59 parser$add_argument("--min_nhet", |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
60 type = "integer", default = 25, |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
61 help = "Minimum number of heterozygous SNPs per segment." |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
62 ) |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
63 parser$add_argument("--snp_nbhd", |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
64 type = "integer", default = 300, |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
65 help = "SNP neighborhood size for pre-processing. Crucial for sparse VCFs." |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
66 ) |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
67 parser$add_argument("--gbuild", |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
68 type = "character", default = "hg38", |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
69 choices = c("hg19", "hg38", "hg18", "mm9", "mm10"), |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
70 help = "Genome build used for alignment." |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
71 ) |
|
6
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
72 parser$add_argument("--enable_merging", |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
73 action = "store_true", default = FALSE, |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
74 help = "If specified, enables the post-processing step to merge adjacent and similar CNV segments." |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
75 ) |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
76 parser$add_argument("--merge_gap_abs", |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
77 type = "integer", default = 1000000, |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
78 help = "Absolute maximum gap in bp to merge adjacent CNV segments." |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
79 ) |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
80 parser$add_argument("--merge_gap_rel", |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
81 type = "double", default = 0.5, |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
82 help = "Relative maximum gap (fraction of avg. segment length) to merge segments." |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
83 ) |
|
7
86bcdc94b008
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents:
6
diff
changeset
|
84 parser$add_argument("--vcf_min_nhet", |
|
86bcdc94b008
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents:
6
diff
changeset
|
85 type = "integer", default = 2, |
|
86bcdc94b008
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents:
6
diff
changeset
|
86 help = "VCF Post-Filter: Minimum number of heterozygous SNPs for a segment to be kept." |
|
86bcdc94b008
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents:
6
diff
changeset
|
87 ) |
|
86bcdc94b008
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents:
6
diff
changeset
|
88 parser$add_argument("--vcf_min_num_mark", |
|
86bcdc94b008
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents:
6
diff
changeset
|
89 type = "integer", default = 3, |
|
86bcdc94b008
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents:
6
diff
changeset
|
90 help = "VCF Post-Filter: Minimum number of total markers for a segment to be kept." |
|
86bcdc94b008
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents:
6
diff
changeset
|
91 ) |
|
4
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
92 #' Classify CNV segments based on TCN/LCN |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
93 classify_cnv <- function(cncf_df) { |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
94 cncf_df$sv_type <- NA_character_ |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
95 cncf_df$sv_type[cncf_df$tcn.em == 2 & (cncf_df$lcn.em == 1 | is.na(cncf_df$lcn.em))] <- "NEUTR" |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
96 cncf_df$sv_type[is.na(cncf_df$sv_type) & cncf_df$tcn.em > 2] <- "DUP" |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
97 cncf_df$sv_type[is.na(cncf_df$sv_type) & cncf_df$tcn.em < 2 & !is.na(cncf_df$lcn.em) & cncf_df$lcn.em > 0] <- "HEMIZYG_DEL" |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
98 cncf_df$sv_type[is.na(cncf_df$sv_type) & cncf_df$tcn.em < 2 & !is.na(cncf_df$lcn.em) & cncf_df$lcn.em == 0] <- "HOMOZYG_DEL" |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
99 cncf_df$sv_type[is.na(cncf_df$sv_type) & cncf_df$tcn.em == 2 & !is.na(cncf_df$lcn.em) & cncf_df$lcn.em == 0] <- "CN_LOH" |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
100 |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
101 # Remplacer les NA restants (si tcn.em < 2 mais lcn.em est NA) par un type général |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
102 cncf_df$sv_type[is.na(cncf_df$sv_type) & cncf_df$tcn.em < 2] <- "DEL" |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
103 |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
104 return(cncf_df) |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
105 } |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
106 |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
107 #' Create a VCF header (explicit version) |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
108 create_vcf_header <- function(sample_id, purity, ploidy) { |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
109 header <- c( |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
110 "##fileformat=VCFv4.2", |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
111 paste0("##fileDate=", format(Sys.Date(), "%Y%m%d")), |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
112 paste0("##source=FACETS_v", packageVersion("facets")), |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
113 "##INFO=<ID=END,Number=1,Type=Integer,Description=\"End position of the variant\">", |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
114 "##INFO=<ID=SVTYPE,Number=1,Type=String,Description=\"Type of structural variant (standard VCF tags: DEL, DUP, CNV)\">", |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
115 "##INFO=<ID=SVLEN,Number=1,Type=Integer,Description=\"Length of the SV\">", |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
116 # --- MODIFICATION --- |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
117 "##INFO=<ID=EVENT,Number=1,Type=String,Description=\"FACETS event classification. Possible values: DUP, HEMIZYG_DEL, HOMOZYG_DEL, CN_LOH\">", |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
118 # --- FIN MODIFICATION --- |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
119 "##INFO=<ID=TCN,Number=1,Type=Integer,Description=\"Total Copy Number (EM fit)\">", |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
120 "##INFO=<ID=LCN,Number=1,Type=Integer,Description=\"Lesser Copy Number (EM fit)\">", |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
121 "##INFO=<ID=NUM_MARK,Number=1,Type=Integer,Description=\"Number of SNPs in the segment\">", |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
122 "##INFO=<ID=NHET,Number=1,Type=Integer,Description=\"Number of heterozygous SNPs in the segment\">", |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
123 paste0("##FACETS_PURITY=", round(purity, 4)), |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
124 paste0("##FACETS_PLOIDY=", round(ploidy, 4)), |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
125 "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO" |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
126 ) |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
127 return(header) |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
128 } |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
129 |
|
6
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
130 # ============================================================================== |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
131 # Function to merge adjacent and similar CNV segments |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
132 # |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
133 # This function implements a merging algorithm that reflects human |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
134 # by using a hybrid proximity condition. Two segments are merged |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
135 # if they have the same CNV state and are close to each other, both in absolute |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
136 # and relative terms. |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
137 # |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
138 # @param cnv_df A data frame of CNV calls, expected to have columns like |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
139 # 'chrom', 'start', 'end', 'svtype', 'tcn.em', 'lcn.em', etc. |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
140 # @param max_gap_abs An integer. The absolute maximum distance (in bp) allowed |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
141 # between two segments to consider them for merging. Acts as a safeguard. |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
142 # @param max_gap_rel A numeric value (0 to 1). The maximum relative distance, |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
143 # expressed as a fraction of the average length of the two adjacent segments. |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
144 # @return A new data frame with the similar adjacent segments merged. |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
145 # ============================================================================== |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
146 merge_segments <- function(cnv_df, max_gap_abs = 1000000, max_gap_rel = 0.5) { |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
147 # If there's nothing to merge, return the original data frame |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
148 if (nrow(cnv_df) < 2) { |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
149 return(cnv_df) |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
150 } |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
151 |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
152 # Ensure the data frame is sorted by genomic position |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
153 cnv_df <- cnv_df[order(cnv_df$chrom, cnv_df$start), ] |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
154 |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
155 merged_rows <- list() |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
156 current_row <- cnv_df[1, ] |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
157 |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
158 for (i in 2:nrow(cnv_df)) { |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
159 next_row <- cnv_df[i, ] |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
160 |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
161 # Basic criteria: segments must be of the same type and CN state |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
162 same_chrom <- current_row$chrom == next_row$chrom |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
163 same_svtype <- current_row$svtype == next_row$svtype |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
164 same_event <- current_row$event == next_row$event |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
165 same_tcn <- current_row$tcn.em == next_row$tcn.em |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
166 same_lcn <- identical(current_row$lcn.em, next_row$lcn.em) # Handles NA safely |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
167 |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
168 # If basic criteria are met, evaluate proximity |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
169 if (same_chrom && same_svtype && same_event && same_tcn && same_lcn) { |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
170 gap <- next_row$start - current_row$end |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
171 |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
172 # Calculate the relative threshold based on the average size of the two segments |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
173 len_a <- current_row$end - current_row$start |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
174 len_b <- next_row$end - next_row$start |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
175 relative_threshold <- ((len_a + len_b) / 2) * max_gap_rel |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
176 |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
177 # Hybrid merge condition: gap must be below BOTH absolute and relative thresholds |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
178 if (gap >= 0 && gap <= max_gap_abs && gap <= relative_threshold) { |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
179 # Merge: update the end of the current segment and aggregate numeric fields |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
180 current_row$end <- next_row$end |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
181 current_row$num.mark <- current_row$num.mark + next_row$num.mark |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
182 current_row$nhet <- current_row$nhet + next_row$nhet |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
183 |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
184 # Skip to the next iteration to try merging the newly expanded segment |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
185 # with the one that follows. |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
186 next |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
187 } |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
188 } |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
189 |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
190 # If no merge occurred, the current segment is final. Save it. |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
191 merged_rows <- append(merged_rows, list(current_row)) |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
192 # The next segment becomes the new current segment. |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
193 current_row <- next_row |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
194 } |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
195 |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
196 # Append the very last segment (which is either a standalone or the result of the last merge) |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
197 merged_rows <- append(merged_rows, list(current_row)) |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
198 |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
199 # Reconstruct a single data frame from the list of merged rows |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
200 do.call(rbind, merged_rows) |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
201 } |
|
3
d1914f4d9daf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents:
2
diff
changeset
|
202 # --- Main Analysis Function --- |
|
2
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
203 main <- function(args) { |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
204 # Set seed for reproducibility |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
205 set.seed(1965) |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
206 |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
207 # --- Read the data with readSnpMatrix() from facets --- |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
208 rcmat <- readSnpMatrix(args$pileup) |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
209 |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
210 # --- Pre-process sample --- |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
211 xx <- preProcSample(rcmat, gbuild = args$gbuild, snp.nbhd = args$snp_nbhd) |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
212 |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
213 # --- Process sample (segmentation) --- |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
214 oo <- procSample(xx, cval = args$cval, min.nhet = args$min_nhet) |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
215 |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
216 # --- Estimate ploidy/purity --- |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
217 fit <- emcncf(oo) |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
218 |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
219 # Write the main segmentation file |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
220 cncf_output <- fit$cncf |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
221 if (nrow(cncf_output) > 0) { |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
222 cncf_output$purity <- fit$purity |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
223 cncf_output$ploidy <- fit$ploidy |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
224 # Reorder columns to have purity/ploidy first for clarity |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
225 cncf_output <- cncf_output[, c("purity", "ploidy", setdiff(names(cncf_output), c("purity", "ploidy")))] |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
226 } |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
227 write.table(cncf_output, file = args$output_seg, sep = "\t", quote = FALSE, row.names = FALSE) |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
228 |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
229 # Write a key-value summary file |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
230 # A NULL value is replaced by NA to preserve vector length. |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
231 summary_df <- data.frame( |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
232 Parameter = c( |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
233 "sample_id", "purity", "ploidy", "dipLogR", "loglik", |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
234 "cval_param", "min_nhet_param", "snp_nbhd_param", "gbuild_param" |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
235 ), |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
236 Value = c( |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
237 args$sample_id, |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
238 ifelse(is.null(fit$purity), NA, fit$purity), |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
239 ifelse(is.null(fit$ploidy), NA, fit$ploidy), |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
240 ifelse(is.null(fit$dipLogR), NA, fit$dipLogR), |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
241 ifelse(is.null(fit$loglik), NA, fit$loglik), |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
242 args$cval, |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
243 args$min_nhet, |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
244 args$snp_nbhd, |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
245 args$gbuild |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
246 ) |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
247 ) |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
248 write.table(summary_df, file = args$output_summary, sep = "\t", quote = FALSE, row.names = FALSE) |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
249 |
|
3
d1914f4d9daf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents:
2
diff
changeset
|
250 # Generate the plots PNG |
|
d1914f4d9daf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents:
2
diff
changeset
|
251 png(file = args$output_plots, width = 12, height = 8, units = "in", res = 300) |
|
2
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
252 plotSample(x = oo, emfit = fit, sname = args$sample_id) |
|
3
d1914f4d9daf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents:
2
diff
changeset
|
253 plot_facets_enhanced(oo, emfit = fit, plot.type = "em", sname = args$sample_id) |
|
d1914f4d9daf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents:
2
diff
changeset
|
254 dev.off() |
|
d1914f4d9daf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents:
2
diff
changeset
|
255 png(file = args$output_spider, width = 8, height = 8, units = "in", res = 300) |
|
d1914f4d9daf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents:
2
diff
changeset
|
256 logRlogORspider(oo$out, oo$dipLogR) |
|
2
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
257 dev.off() |
|
4
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
258 |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
259 # --- Generate VCF file --- |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
260 |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
261 # Classify segments and define standard SVTYPEs + detailed EVENTs |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
262 cncf_for_vcf <- fit$cncf |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
263 if (nrow(cncf_for_vcf) > 0) { |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
264 cncf_for_vcf$svtype <- NA_character_ |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
265 cncf_for_vcf$event <- NA_character_ |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
266 |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
267 # Duplications |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
268 cncf_for_vcf[cncf_for_vcf$tcn.em > 2, c("svtype", "event")] <- c("DUP", "DUP") |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
269 |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
270 # Deletions |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
271 cncf_for_vcf[cncf_for_vcf$tcn.em < 2, c("svtype")] <- "DEL" |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
272 cncf_for_vcf[cncf_for_vcf$tcn.em == 1, c("event")] <- "HEMIZYG_DEL" |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
273 cncf_for_vcf[cncf_for_vcf$tcn.em == 0, c("event")] <- "HOMOZYG_DEL" |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
274 |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
275 # Copy-Neutral LOH |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
276 cncf_for_vcf[cncf_for_vcf$tcn.em == 2 & !is.na(cncf_for_vcf$lcn.em) & cncf_for_vcf$lcn.em == 0, c("svtype", "event")] <- c("CNV", "CN_LOH") |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
277 |
|
6
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
278 # Filter normal segments (where'svtype' is still NA) |
|
4
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
279 cnv_calls <- cncf_for_vcf[!is.na(cncf_for_vcf$svtype), ] |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
280 } else { |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
281 cnv_calls <- data.frame() |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
282 } |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
283 |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
284 |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
285 if (nrow(cnv_calls) > 0) { |
|
6
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
286 if (args$enable_merging) { |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
287 cnv_calls <- merge_segments( |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
288 cnv_df = cnv_calls, |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
289 max_gap_abs = args$merge_gap_abs, |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
290 max_gap_rel = args$merge_gap_rel |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
291 ) |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
292 } |
|
7
86bcdc94b008
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents:
6
diff
changeset
|
293 # Apply VCF post-filters to remove low-quality/artefactual segments |
|
86bcdc94b008
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents:
6
diff
changeset
|
294 # This addresses the issue of FACETS' EM algorithm sometimes creating |
|
86bcdc94b008
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents:
6
diff
changeset
|
295 # micro-segments that bypass the initial min.nhet segmentation parameter. |
|
86bcdc94b008
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents:
6
diff
changeset
|
296 original_rows <- nrow(cnv_calls) |
|
86bcdc94b008
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents:
6
diff
changeset
|
297 cnv_calls <- cnv_calls[ |
|
86bcdc94b008
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents:
6
diff
changeset
|
298 cnv_calls$nhet >= args$vcf_min_nhet & |
|
86bcdc94b008
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents:
6
diff
changeset
|
299 cnv_calls$num.mark >= args$vcf_min_num_mark, |
|
86bcdc94b008
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents:
6
diff
changeset
|
300 ] |
|
86bcdc94b008
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents:
6
diff
changeset
|
301 cat(paste("Applied VCF post-filters: kept", nrow(cnv_calls), "of", original_rows, "segments.\n")) |
|
86bcdc94b008
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
artbio
parents:
6
diff
changeset
|
302 |
|
4
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
303 vcf_header <- create_vcf_header(args$sample_id, fit$purity, fit$ploidy) |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
304 |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
305 vcf_body <- apply(cnv_calls, 1, function(seg) { |
|
6
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
306 cnv_calls <- merge_segments(cnv_calls) |
|
4
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
307 alt_allele <- paste0("<", seg["svtype"], ">") |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
308 info <- paste0( |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
309 "END=", seg["end"], |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
310 ";SVTYPE=", seg["svtype"], |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
311 ";SVLEN=", as.integer(seg["end"]) - as.integer(seg["start"]), |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
312 ";TCN=", seg["tcn.em"], |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
313 ";LCN=", ifelse(is.na(seg["lcn.em"]), ".", seg["lcn.em"]), |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
314 ";EVENT=", seg["event"], |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
315 ";NUM_MARK=", seg["num.mark"], |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
316 ";NHET=", seg["nhet"] |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
317 ) |
|
6
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
318 # Remove any space(s) immediately following an '=' sign in the INFO string. |
|
625038b7d764
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
artbio
parents:
4
diff
changeset
|
319 info <- gsub("=\\s+", "=", info) |
|
4
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
320 |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
321 paste(seg["chrom"], seg["start"], ".", "N", alt_allele, ".", "PASS", info, sep = "\t") |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
322 }) |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
323 |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
324 writeLines(c(vcf_header, vcf_body), con = args$output_vcf) |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
325 } else { |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
326 vcf_header <- create_vcf_header(args$sample_id, fit$purity, fit$ploidy) |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
327 writeLines(vcf_header, con = args$output_vcf) |
|
3f62267c4be7
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit e47e0ed100904318ef4aae661b763e049c358edf
artbio
parents:
3
diff
changeset
|
328 } |
|
2
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
329 } |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
330 |
|
3
d1914f4d9daf
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 64ac36125f04497dd51028f307e059fca9ec0503
artbio
parents:
2
diff
changeset
|
331 # --- Execution Block --- |
|
2
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
332 if (!interactive()) { |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
333 args <- parser$parse_args() |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
334 main(args) |
|
66a56502199d
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 0176d2cc4f1caf0ab948ef72efb25ccce735461e
artbio
parents:
diff
changeset
|
335 } |
