comparison facets_analysis.xml @ 6:625038b7d764 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
author artbio
date Mon, 06 Oct 2025 15:50:12 +0000
parents 1d56a6b5739f
children 86bcdc94b008
comparison
equal deleted inserted replaced
5:1d56a6b5739f 6:625038b7d764
16 --output_vcf '$output_vcf' 16 --output_vcf '$output_vcf'
17 --cval $cval 17 --cval $cval
18 --min_nhet $min_nhet 18 --min_nhet $min_nhet
19 --snp_nbhd $snp_nbhd 19 --snp_nbhd $snp_nbhd
20 --gbuild '$gbuild' 20 --gbuild '$gbuild'
21 #if $merging.merge_select == "yes":
22 --enable_merging
23 --merge_gap_abs $merging.max_gap_abs
24 --merge_gap_rel $merging.max_gap_rel
25 #end if
21 ]]></command> 26 ]]></command>
22 <inputs> 27 <inputs>
23 <param name="pileup" type="data" format="tabular.gz" label="FACETS Pileup File" help="Output from the 'SNP Pileup for FACETS' tool."/> 28 <param name="pileup" type="data" format="tabular.gz" label="FACETS Pileup File" help="Output from the 'SNP Pileup for FACETS' tool."/>
24 29
25 <param name="cval" type="float" value="150" label="Critical value for segmentation (cval)" 30 <param name="cval" type="float" value="150" label="Critical value for segmentation (cval)"
31 <option value="hg19">Human (hg19)</option> 36 <option value="hg19">Human (hg19)</option>
32 <option value="hg18">Human (hg18)</option> 37 <option value="hg18">Human (hg18)</option>
33 <option value="mm10">Mouse (mm10)</option> 38 <option value="mm10">Mouse (mm10)</option>
34 <option value="mm9">Mouse (mm9)</option> 39 <option value="mm9">Mouse (mm9)</option>
35 </param> 40 </param>
36
37 <param name="snp_nbhd" type="integer" value="300" label="SNP neighborhood size (snp.nbhd)" help="Should match the --pseudo-snps distance used to generate the pileup file. Default is 300."/> 41 <param name="snp_nbhd" type="integer" value="300" label="SNP neighborhood size (snp.nbhd)" help="Should match the --pseudo-snps distance used to generate the pileup file. Default is 300."/>
42 <conditional name="merging">
43 <param name="merge_select" type="select" label="Post-process VCF to merge adjacent segments?" help="Optional step to merge adjacent CNV calls that likely represent a single biological event.">
44 <option value="no" selected="true">No</option>
45 <option value="yes">Yes</option>
46 </param>
47 <when value="no"/>
48 <when value="yes">
49 <param name="max_gap_abs" type="integer" value="1000000" label="Absolute maximum gap to merge (bp)" help="Maximum distance in base pairs allowed between two segments to consider them for merging."/>
50 <param name="max_gap_rel" type="float" value="0.5" label="Relative maximum gap to merge (fraction)" help="Maximum relative distance, as a fraction of the average size of the two segments."/>
51 </when>
52 </conditional>
38 </inputs> 53 </inputs>
39 <outputs> 54 <outputs>
40 <data name="output_seg" format="tsv" label="FACETS Segmentation on ${on_string}"/> 55 <data name="output_seg" format="tsv" label="FACETS Segmentation on ${on_string}"/>
41 <data name="output_summary" format="tabular" label="FACETS Summary on ${on_string}"/> 56 <data name="output_summary" format="tabular" label="FACETS Summary on ${on_string}"/>
42 <data name="output_plots" format="png" label="FACETS Plots on ${on_string}"/> 57 <data name="output_plots" format="png" label="FACETS Plots on ${on_string}"/>
52 <output name="output_plots" file="test_sample_01.plots.png" ftype="png" compare="sim_size" delta="20000"/> 67 <output name="output_plots" file="test_sample_01.plots.png" ftype="png" compare="sim_size" delta="20000"/>
53 <output name="output_spider" file="test_sample_01.spider.png" ftype="png" compare="sim_size" delta="10000"/> 68 <output name="output_spider" file="test_sample_01.spider.png" ftype="png" compare="sim_size" delta="10000"/>
54 <output name="output_vcf" file="test_sample_01.cnv.vcf" ftype="vcf" lines_diff="2" /> 69 <output name="output_vcf" file="test_sample_01.cnv.vcf" ftype="vcf" lines_diff="2" />
55 </test> 70 </test>
56 </tests> 71 </tests>
57 <help><![CDATA[ 72 <help><![CDATA[
58 **What it does** 73 **What it does**
59 74
60 This tool runs the `FACETS` R package to perform allele-specific copy number and clonal heterogeneity analysis. It takes the compressed pileup file generated by the "SNP Pileup for FACETS" tool as its primary input. 75 This tool runs the `FACETS` R package to perform allele-specific copy number
76 and clonal heterogeneity analysis. It takes the compressed pileup file
77 generated by the "SNP Pileup for FACETS" tool as its primary input and
78 produces a set of standard FACETS outputs, including segmentation calls,
79 purity/ploidy estimates, plots, and a VCF file summarizing the CNV events.
61 80
62 **Outputs** 81 ---
63 82
64 - A **Segmentation file (TSV)** with the genomic coordinates of each segment and their associated copy number (TCN, LCN). 83 **Primary Parameters**
65 - A **Summary file** with the main estimated parameters (purity, ploidy, etc.). 84
66 - A **CNV calls file (VCF)** listing the detected copy number events in a standard VCF format. 85 These parameters control the core of the FACETS segmentation algorithm.
67 - A **Plots file (PNG)** with an enhanced visualization of the genome-wide results, including a legend for copy number states. 86
68 - A **Spider Plot (PNG)** for diagnosing the quality of the purity/ploidy model fit. 87 - **Critical value for segmentation (cval):** This is the most important
69 ]]></help> 88 parameter for controlling the sensitivity of the segmentation. A *higher*
89 value (e.g., 200-800) will result in fewer segments and is generally
90 recommended for high-density data like Whole Genome Sequencing (WGS).
91 A *lower* value (e.g., 50-150) increases sensitivity, resulting in more
92 segments, and is more suitable for sparser data like Whole Exome
93 Sequencing (WES).
94
95 - **Minimum number of heterozygous SNPs (min.nhet):** This is a quality
96 filter. After segmentation, any segment that is supported by fewer
97 heterozygous SNPs than this threshold will be discarded. This helps
98 to remove unreliable, small segments.
99
100 - **SNP neighbourhood size (snp.nbhd):** This parameter defines the genomic
101 window (in bp) around a SNP used for local read depth normalization.
102 The default value is generally appropriate.
103
104 ---
105
106 **Advanced VCF Post-processing: Merging Segments**
107
108 You can optionally enable a post-processing step to merge adjacent CNV
109 segments in the output VCF.
110
111 *Why is this useful?*
112 Segmentation algorithms can sometimes split a single, large biological event
113 (e.g., a 10 Mb deletion) into several smaller, adjacent segments with the
114 same copy number state. This feature attempts to correct this by merging
115 these segments back together, providing a cleaner and more biologically
116 accurate representation of the CNV landscape.
117
118 The merging is controlled by an algorithm using two thresholds:
119
120 - **Absolute maximum gap:** The maximum distance in base pairs allowed
121 between two segments to even consider them for merging. This acts as a
122 safeguard.
123 - **Relative maximum gap:** The maximum distance allowed, expressed as a
124 *fraction* of the average size of the two segments. This allows large
125 gaps between large segments, but not between small ones, trying to mimic
126 how a human expert would interpret the data.
127
128 ---
129
130 **Outputs**
131
132 - **Segmentation file (TSV):** The raw segment data with genomic coordinates
133 and their associated copy number (TCN, LCN).
134 - **Summary file:** The main estimated parameters like purity, ploidy, etc.
135 - **CNV calls file (VCF):** A summary of the detected copy number events in
136 a standard VCF format, suitable for downstream analysis.
137 - **Plots file (PNG):** An enhanced visualization of the genome-wide results.
138 - **Spider Plot (PNG):** This is the most important **diagnostic plot** for
139 assessing the quality of the FACETS fit.
140 On this plot (generated by the `logRlogORspider` function), each
141 **circle** is a genomic segment from your data. The **curves** (labeled
142 `2-1`, `1-0`, etc.) represent the theoretical positions for integer copy
143 number states given the estimated purity and ploidy. A high-confidence
144 result is achieved when your data (the circles) align closely with these
145 theoretical curves. For a detailed interpretation, please refer to the
146 original FACETS publication: Shen and Seshan, *NAR*, 2016.
147 ]]></help>
70 <expand macro="citations"/> 148 <expand macro="citations"/>
71 </tool> 149 </tool>