comparison facets_analysis.xml @ 7:86bcdc94b008 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 2da49e9385ddce5c74e077c81a52ff1ea4131b81
author artbio
date Wed, 08 Oct 2025 17:41:18 +0000
parents 625038b7d764
children
comparison
equal deleted inserted replaced
6:625038b7d764 7:86bcdc94b008
21 #if $merging.merge_select == "yes": 21 #if $merging.merge_select == "yes":
22 --enable_merging 22 --enable_merging
23 --merge_gap_abs $merging.max_gap_abs 23 --merge_gap_abs $merging.max_gap_abs
24 --merge_gap_rel $merging.max_gap_rel 24 --merge_gap_rel $merging.max_gap_rel
25 #end if 25 #end if
26 --vcf_min_nhet $filtering.vcf_min_nhet
27 --vcf_min_num_mark $filtering.vcf_min_num_mark
26 ]]></command> 28 ]]></command>
27 <inputs> 29 <inputs>
28 <param name="pileup" type="data" format="tabular.gz" label="FACETS Pileup File" help="Output from the 'SNP Pileup for FACETS' tool."/> 30 <param name="pileup" type="data" format="tabular.gz" label="FACETS Pileup File" help="Output from the 'SNP Pileup for FACETS' tool."/>
29 31
30 <param name="cval" type="float" value="150" label="Critical value for segmentation (cval)" 32 <param name="cval" type="float" value="150" label="Critical value for segmentation (cval)"
48 <when value="yes"> 50 <when value="yes">
49 <param name="max_gap_abs" type="integer" value="1000000" label="Absolute maximum gap to merge (bp)" help="Maximum distance in base pairs allowed between two segments to consider them for merging."/> 51 <param name="max_gap_abs" type="integer" value="1000000" label="Absolute maximum gap to merge (bp)" help="Maximum distance in base pairs allowed between two segments to consider them for merging."/>
50 <param name="max_gap_rel" type="float" value="0.5" label="Relative maximum gap to merge (fraction)" help="Maximum relative distance, as a fraction of the average size of the two segments."/> 52 <param name="max_gap_rel" type="float" value="0.5" label="Relative maximum gap to merge (fraction)" help="Maximum relative distance, as a fraction of the average size of the two segments."/>
51 </when> 53 </when>
52 </conditional> 54 </conditional>
55 <section name="filtering" title="VCF Output Filtering" expanded="false">
56 <param name="vcf_min_nhet" type="integer" value="2" label="Minimum heterozygous SNPs for VCF output" help="Post-filter to remove final segments with fewer than this many heterozygous SNPs."/>
57 <param name="vcf_min_num_mark" type="integer" value="3" label="Minimum total markers for VCF output" help="Post-filter to remove final segments with fewer than this many total markers (SNPs). Helps remove SVLEN=0 artifacts."/>
58 </section>
53 </inputs> 59 </inputs>
54 <outputs> 60 <outputs>
55 <data name="output_seg" format="tsv" label="FACETS Segmentation on ${on_string}"/> 61 <data name="output_seg" format="tsv" label="FACETS Segmentation on ${on_string}"/>
56 <data name="output_summary" format="tabular" label="FACETS Summary on ${on_string}"/> 62 <data name="output_summary" format="tabular" label="FACETS Summary on ${on_string}"/>
57 <data name="output_plots" format="png" label="FACETS Plots on ${on_string}"/> 63 <data name="output_plots" format="png" label="FACETS Plots on ${on_string}"/>
67 <output name="output_plots" file="test_sample_01.plots.png" ftype="png" compare="sim_size" delta="20000"/> 73 <output name="output_plots" file="test_sample_01.plots.png" ftype="png" compare="sim_size" delta="20000"/>
68 <output name="output_spider" file="test_sample_01.spider.png" ftype="png" compare="sim_size" delta="10000"/> 74 <output name="output_spider" file="test_sample_01.spider.png" ftype="png" compare="sim_size" delta="10000"/>
69 <output name="output_vcf" file="test_sample_01.cnv.vcf" ftype="vcf" lines_diff="2" /> 75 <output name="output_vcf" file="test_sample_01.cnv.vcf" ftype="vcf" lines_diff="2" />
70 </test> 76 </test>
71 </tests> 77 </tests>
72 <help><![CDATA[ 78 <help><![CDATA[
73 **What it does** 79 **What it does**
74 80
75 This tool runs the `FACETS` R package to perform allele-specific copy number 81 This tool runs the `FACETS` R package to perform allele-specific copy number
76 and clonal heterogeneity analysis. It takes the compressed pileup file 82 and clonal heterogeneity analysis. It takes the compressed pileup file
77 generated by the "SNP Pileup for FACETS" tool as its primary input and 83 generated by the "SNP Pileup for FACETS" tool as its primary input and
78 produces a set of standard FACETS outputs, including segmentation calls, 84 produces a set of standard FACETS outputs.
79 purity/ploidy estimates, plots, and a VCF file summarizing the CNV events.
80 85
81 --- 86 ---
82 87
83 **Primary Parameters** 88 **Primary Parameters**
84 89
85 These parameters control the core of the FACETS segmentation algorithm. 90 These parameters control the core of the FACETS segmentation algorithm.
86 91
87 - **Critical value for segmentation (cval):** This is the most important 92 - **Critical value for segmentation (cval):** This is the most important
88 parameter for controlling the sensitivity of the segmentation. A *higher* 93 parameter for controlling the sensitivity. A *higher* value (e.g., 200-800)
89 value (e.g., 200-800) will result in fewer segments and is generally 94 results in fewer segments (less sensitive) and is recommended for
90 recommended for high-density data like Whole Genome Sequencing (WGS). 95 high-density data (WGS). A *lower* value (e.g., 50-150) increases
91 A *lower* value (e.g., 50-150) increases sensitivity, resulting in more 96 sensitivity and is more suitable for sparser data (WES).
92 segments, and is more suitable for sparser data like Whole Exome
93 Sequencing (WES).
94 97
95 - **Minimum number of heterozygous SNPs (min.nhet):** This is a quality 98 - **Minimum number of heterozygous SNPs (min.nhet):** This is a quality
96 filter. After segmentation, any segment that is supported by fewer 99 filter. Segments supported by fewer heterozygous SNPs than this
97 heterozygous SNPs than this threshold will be discarded. This helps 100 threshold will be discarded during the initial segmentation pass.
98 to remove unreliable, small segments.
99 101
100 - **SNP neighbourhood size (snp.nbhd):** This parameter defines the genomic 102 - **SNP neighbourhood size (snp.nbhd):** Defines the genomic window (in bp)
101 window (in bp) around a SNP used for local read depth normalization. 103 around a SNP used for local read depth normalization.
102 The default value is generally appropriate.
103 104
104 --- 105 ---
105 106
106 **Advanced VCF Post-processing: Merging Segments** 107 **Advanced VCF Post-processing**
107 108
108 You can optionally enable a post-processing step to merge adjacent CNV 109 You can optionally enable post-processing steps to refine the final VCF.
109 segments in the output VCF.
110 110
111 *Why is this useful?* 111 - **Merging Segments:** This option merges adjacent CNV segments that likely
112 Segmentation algorithms can sometimes split a single, large biological event 112 represent a single biological event, providing a cleaner and more
113 (e.g., a 10 Mb deletion) into several smaller, adjacent segments with the 113 biologically accurate output.
114 same copy number state. This feature attempts to correct this by merging
115 these segments back together, providing a cleaner and more biologically
116 accurate representation of the CNV landscape.
117 114
118 The merging is controlled by an algorithm using two thresholds: 115 - **Filtering Segments:** This option removes low-quality or artefactual
119 116 segments based on the number of SNPs supporting them. This is recommended
120 - **Absolute maximum gap:** The maximum distance in base pairs allowed 117 as FACETS can sometimes report micro-segments that are not biologically
121 between two segments to even consider them for merging. This acts as a 118 relevant.
122 safeguard.
123 - **Relative maximum gap:** The maximum distance allowed, expressed as a
124 *fraction* of the average size of the two segments. This allows large
125 gaps between large segments, but not between small ones, trying to mimic
126 how a human expert would interpret the data.
127 119
128 --- 120 ---
129 121
130 **Outputs** 122 **Outputs**
131 123
132 - **Segmentation file (TSV):** The raw segment data with genomic coordinates 124 - **Segmentation file (TSV):** The raw segment data with genomic coordinates
133 and their associated copy number (TCN, LCN). 125 and their associated copy number (TCN, LCN).
134 - **Summary file:** The main estimated parameters like purity, ploidy, etc. 126 - **Summary file:** The main estimated parameters like purity, ploidy, etc.
127 - **Plots file (PNG):** A genome-wide visualization of the copy number and
128 allelic imbalance results across all chromosomes.
129 - **Spider Plot (PNG):** The most important **diagnostic plot** for assessing
130 the quality of the FACETS fit. See detailed explanation below.
135 - **CNV calls file (VCF):** A summary of the detected copy number events in 131 - **CNV calls file (VCF):** A summary of the detected copy number events in
136 a standard VCF format, suitable for downstream analysis. 132 a standard VCF format for structural variants. The `ALT` column contains
137 - **Plots file (PNG):** An enhanced visualization of the genome-wide results. 133 symbolic alleles (`<DEL>`, `<DUP>`). All FACETS-specific details are in
138 - **Spider Plot (PNG):** This is the most important **diagnostic plot** for 134 the `INFO` field:
139 assessing the quality of the FACETS fit. 135
140 On this plot (generated by the `logRlogORspider` function), each 136 ``SVTYPE``
141 **circle** is a genomic segment from your data. The **curves** (labeled 137 Type of variant (e.g., DEL, DUP).
142 `2-1`, `1-0`, etc.) represent the theoretical positions for integer copy 138 ``EVENT``
143 number states given the estimated purity and ploidy. A high-confidence 139 FACETS classification (e.g., HOMOZYG_DEL, CN_LOH).
144 result is achieved when your data (the circles) align closely with these 140 ``TCN``
145 theoretical curves. For a detailed interpretation, please refer to the 141 Total Copy Number.
146 original FACETS publication: Shen and Seshan, *NAR*, 2016. 142 ``LCN``
147 ]]></help> 143 Lesser Copy Number.
144 ``NUM_MARK``
145 Total number of SNPs in the segment.
146 ``NHET``
147 Number of heterozygous SNPs in the segment.
148
149 **Interpreting the Spider Plot**
150
151 On this plot (generated by the `logRlogORspider` function), each
152 **circle** is a genomic segment from your data. The **curves** (labeled
153 `2-1`, `1-0`, etc.) represent the theoretical positions for integer copy
154 number states. A high-confidence result is achieved when your data (the
155 circles) align closely with these curves. For details, refer to the
156 original FACETS publication: Shen and Seshan, *NAR*, 2016.
157
158 ]]></help>
148 <expand macro="citations"/> 159 <expand macro="citations"/>
149 </tool> 160 </tool>