Mercurial > repos > artbio > cnv_facets
comparison facets_analysis.xml @ 6:625038b7d764 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/facets commit 8cced47697e5777fd60dacc60300e770bd409e9d
| author | artbio |
|---|---|
| date | Mon, 06 Oct 2025 15:50:12 +0000 |
| parents | 1d56a6b5739f |
| children | 86bcdc94b008 |
comparison
equal
deleted
inserted
replaced
| 5:1d56a6b5739f | 6:625038b7d764 |
|---|---|
| 16 --output_vcf '$output_vcf' | 16 --output_vcf '$output_vcf' |
| 17 --cval $cval | 17 --cval $cval |
| 18 --min_nhet $min_nhet | 18 --min_nhet $min_nhet |
| 19 --snp_nbhd $snp_nbhd | 19 --snp_nbhd $snp_nbhd |
| 20 --gbuild '$gbuild' | 20 --gbuild '$gbuild' |
| 21 #if $merging.merge_select == "yes": | |
| 22 --enable_merging | |
| 23 --merge_gap_abs $merging.max_gap_abs | |
| 24 --merge_gap_rel $merging.max_gap_rel | |
| 25 #end if | |
| 21 ]]></command> | 26 ]]></command> |
| 22 <inputs> | 27 <inputs> |
| 23 <param name="pileup" type="data" format="tabular.gz" label="FACETS Pileup File" help="Output from the 'SNP Pileup for FACETS' tool."/> | 28 <param name="pileup" type="data" format="tabular.gz" label="FACETS Pileup File" help="Output from the 'SNP Pileup for FACETS' tool."/> |
| 24 | 29 |
| 25 <param name="cval" type="float" value="150" label="Critical value for segmentation (cval)" | 30 <param name="cval" type="float" value="150" label="Critical value for segmentation (cval)" |
| 31 <option value="hg19">Human (hg19)</option> | 36 <option value="hg19">Human (hg19)</option> |
| 32 <option value="hg18">Human (hg18)</option> | 37 <option value="hg18">Human (hg18)</option> |
| 33 <option value="mm10">Mouse (mm10)</option> | 38 <option value="mm10">Mouse (mm10)</option> |
| 34 <option value="mm9">Mouse (mm9)</option> | 39 <option value="mm9">Mouse (mm9)</option> |
| 35 </param> | 40 </param> |
| 36 | |
| 37 <param name="snp_nbhd" type="integer" value="300" label="SNP neighborhood size (snp.nbhd)" help="Should match the --pseudo-snps distance used to generate the pileup file. Default is 300."/> | 41 <param name="snp_nbhd" type="integer" value="300" label="SNP neighborhood size (snp.nbhd)" help="Should match the --pseudo-snps distance used to generate the pileup file. Default is 300."/> |
| 42 <conditional name="merging"> | |
| 43 <param name="merge_select" type="select" label="Post-process VCF to merge adjacent segments?" help="Optional step to merge adjacent CNV calls that likely represent a single biological event."> | |
| 44 <option value="no" selected="true">No</option> | |
| 45 <option value="yes">Yes</option> | |
| 46 </param> | |
| 47 <when value="no"/> | |
| 48 <when value="yes"> | |
| 49 <param name="max_gap_abs" type="integer" value="1000000" label="Absolute maximum gap to merge (bp)" help="Maximum distance in base pairs allowed between two segments to consider them for merging."/> | |
| 50 <param name="max_gap_rel" type="float" value="0.5" label="Relative maximum gap to merge (fraction)" help="Maximum relative distance, as a fraction of the average size of the two segments."/> | |
| 51 </when> | |
| 52 </conditional> | |
| 38 </inputs> | 53 </inputs> |
| 39 <outputs> | 54 <outputs> |
| 40 <data name="output_seg" format="tsv" label="FACETS Segmentation on ${on_string}"/> | 55 <data name="output_seg" format="tsv" label="FACETS Segmentation on ${on_string}"/> |
| 41 <data name="output_summary" format="tabular" label="FACETS Summary on ${on_string}"/> | 56 <data name="output_summary" format="tabular" label="FACETS Summary on ${on_string}"/> |
| 42 <data name="output_plots" format="png" label="FACETS Plots on ${on_string}"/> | 57 <data name="output_plots" format="png" label="FACETS Plots on ${on_string}"/> |
| 52 <output name="output_plots" file="test_sample_01.plots.png" ftype="png" compare="sim_size" delta="20000"/> | 67 <output name="output_plots" file="test_sample_01.plots.png" ftype="png" compare="sim_size" delta="20000"/> |
| 53 <output name="output_spider" file="test_sample_01.spider.png" ftype="png" compare="sim_size" delta="10000"/> | 68 <output name="output_spider" file="test_sample_01.spider.png" ftype="png" compare="sim_size" delta="10000"/> |
| 54 <output name="output_vcf" file="test_sample_01.cnv.vcf" ftype="vcf" lines_diff="2" /> | 69 <output name="output_vcf" file="test_sample_01.cnv.vcf" ftype="vcf" lines_diff="2" /> |
| 55 </test> | 70 </test> |
| 56 </tests> | 71 </tests> |
| 57 <help><![CDATA[ | 72 <help><![CDATA[ |
| 58 **What it does** | 73 **What it does** |
| 59 | 74 |
| 60 This tool runs the `FACETS` R package to perform allele-specific copy number and clonal heterogeneity analysis. It takes the compressed pileup file generated by the "SNP Pileup for FACETS" tool as its primary input. | 75 This tool runs the `FACETS` R package to perform allele-specific copy number |
| 76 and clonal heterogeneity analysis. It takes the compressed pileup file | |
| 77 generated by the "SNP Pileup for FACETS" tool as its primary input and | |
| 78 produces a set of standard FACETS outputs, including segmentation calls, | |
| 79 purity/ploidy estimates, plots, and a VCF file summarizing the CNV events. | |
| 61 | 80 |
| 62 **Outputs** | 81 --- |
| 63 | 82 |
| 64 - A **Segmentation file (TSV)** with the genomic coordinates of each segment and their associated copy number (TCN, LCN). | 83 **Primary Parameters** |
| 65 - A **Summary file** with the main estimated parameters (purity, ploidy, etc.). | 84 |
| 66 - A **CNV calls file (VCF)** listing the detected copy number events in a standard VCF format. | 85 These parameters control the core of the FACETS segmentation algorithm. |
| 67 - A **Plots file (PNG)** with an enhanced visualization of the genome-wide results, including a legend for copy number states. | 86 |
| 68 - A **Spider Plot (PNG)** for diagnosing the quality of the purity/ploidy model fit. | 87 - **Critical value for segmentation (cval):** This is the most important |
| 69 ]]></help> | 88 parameter for controlling the sensitivity of the segmentation. A *higher* |
| 89 value (e.g., 200-800) will result in fewer segments and is generally | |
| 90 recommended for high-density data like Whole Genome Sequencing (WGS). | |
| 91 A *lower* value (e.g., 50-150) increases sensitivity, resulting in more | |
| 92 segments, and is more suitable for sparser data like Whole Exome | |
| 93 Sequencing (WES). | |
| 94 | |
| 95 - **Minimum number of heterozygous SNPs (min.nhet):** This is a quality | |
| 96 filter. After segmentation, any segment that is supported by fewer | |
| 97 heterozygous SNPs than this threshold will be discarded. This helps | |
| 98 to remove unreliable, small segments. | |
| 99 | |
| 100 - **SNP neighbourhood size (snp.nbhd):** This parameter defines the genomic | |
| 101 window (in bp) around a SNP used for local read depth normalization. | |
| 102 The default value is generally appropriate. | |
| 103 | |
| 104 --- | |
| 105 | |
| 106 **Advanced VCF Post-processing: Merging Segments** | |
| 107 | |
| 108 You can optionally enable a post-processing step to merge adjacent CNV | |
| 109 segments in the output VCF. | |
| 110 | |
| 111 *Why is this useful?* | |
| 112 Segmentation algorithms can sometimes split a single, large biological event | |
| 113 (e.g., a 10 Mb deletion) into several smaller, adjacent segments with the | |
| 114 same copy number state. This feature attempts to correct this by merging | |
| 115 these segments back together, providing a cleaner and more biologically | |
| 116 accurate representation of the CNV landscape. | |
| 117 | |
| 118 The merging is controlled by an algorithm using two thresholds: | |
| 119 | |
| 120 - **Absolute maximum gap:** The maximum distance in base pairs allowed | |
| 121 between two segments to even consider them for merging. This acts as a | |
| 122 safeguard. | |
| 123 - **Relative maximum gap:** The maximum distance allowed, expressed as a | |
| 124 *fraction* of the average size of the two segments. This allows large | |
| 125 gaps between large segments, but not between small ones, trying to mimic | |
| 126 how a human expert would interpret the data. | |
| 127 | |
| 128 --- | |
| 129 | |
| 130 **Outputs** | |
| 131 | |
| 132 - **Segmentation file (TSV):** The raw segment data with genomic coordinates | |
| 133 and their associated copy number (TCN, LCN). | |
| 134 - **Summary file:** The main estimated parameters like purity, ploidy, etc. | |
| 135 - **CNV calls file (VCF):** A summary of the detected copy number events in | |
| 136 a standard VCF format, suitable for downstream analysis. | |
| 137 - **Plots file (PNG):** An enhanced visualization of the genome-wide results. | |
| 138 - **Spider Plot (PNG):** This is the most important **diagnostic plot** for | |
| 139 assessing the quality of the FACETS fit. | |
| 140 On this plot (generated by the `logRlogORspider` function), each | |
| 141 **circle** is a genomic segment from your data. The **curves** (labeled | |
| 142 `2-1`, `1-0`, etc.) represent the theoretical positions for integer copy | |
| 143 number states given the estimated purity and ploidy. A high-confidence | |
| 144 result is achieved when your data (the circles) align closely with these | |
| 145 theoretical curves. For a detailed interpretation, please refer to the | |
| 146 original FACETS publication: Shen and Seshan, *NAR*, 2016. | |
| 147 ]]></help> | |
| 70 <expand macro="citations"/> | 148 <expand macro="citations"/> |
| 71 </tool> | 149 </tool> |
