changeset 5:aa3fa1c29ccf draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/cnvkit commit fc1282ec68b346988203ead860e9b9d6a47e9efb
author iuc
date Sat, 01 Mar 2025 12:00:32 +0000
parents b02eb4c918f2
children
files fix.xml macros.xml test-data/sample.cnv.vcf
diffstat 3 files changed, 44 insertions(+), 44 deletions(-) [+]
line wrap: on
line diff
--- a/fix.xml	Mon Jan 20 16:38:49 2025 +0000
+++ b/fix.xml	Sat Mar 01 12:00:32 2025 +0000
@@ -32,23 +32,23 @@
             $no_rmask
     ]]></command>
     <inputs>
-        <param name="input_target_file" type="data" format="bam" label="Sample Target coverage cnn file" help="" />
-        <param name="input_antitarget_file" type="data" optional="true" format="tabular" label="Sample Antitarget coverage cnn file" help="" />
-        <param name="input_reference_coverage_file" type="data" optional="true" format="tabular" label="Reference coverage cnn file" help="TAS-on-target coverage cnn file is also accepted" />
+        <param name="input_target_file" type="data" format="cnn" label="Sample Target coverage cnn file" help="" />
+        <param name="input_antitarget_file" type="data" optional="true" format="cnn" label="Sample Antitarget coverage cnn file" help="" />
+        <param name="input_reference_coverage_file" type="data" optional="true" format="cnn" label="Reference coverage cnn file" help="TAS-on-target coverage cnn file is also accepted" />
         <param argument="--cluster" type="boolean" checked="false" truevalue="--cluster" falsevalue="" label="Compare and use cluster-specific values present in the reference profile" help="" />
-        <param argument="--sample-id" optional="true" type="text" label="Sample ID for target/antitarget files" value="" help="Otherwise inferred from file names" />
+        <param argument="--sample-id" type="text" label="Sample ID for target/antitarget files" help="Otherwise inferred from file names" />
         <param argument="--no-gc" type="boolean" checked="false" truevalue="--no-gc" falsevalue="" label="Skip GC correction" help="" />
         <param argument="--no-edge" type="boolean" checked="false" truevalue="--no-edge" falsevalue="" label="Skip edge-effect correction" help="" />
         <param argument="--no-rmask" type="boolean" checked="false" truevalue="--no-rmask" falsevalue="" label=" Skip RepeatMasker correction" help="" />
     </inputs>
     <outputs>
-        <data name="out_sample_cnr" format="tabular" label="${tool.name} on ${on_string}: Bin-Level log2 Ratios/Coverages" from_work_dir="sample.cnr" />
+        <data name="out_sample_cnr" format="cnr" label="${tool.name} on ${on_string}: Bin-Level log2 Ratios/Coverages" from_work_dir="sample.cnr" />
     </outputs>
     <tests>
         <test expect_num_outputs="1">
-            <param name="input_target_file" ftype="tabular" value="tumor.targetcoverage.cnn" />
-            <param name="input_antitarget_file" ftype="tabular" value="tumor.antitargetcoverage.cnn" />
-            <param name="input_reference_coverage_file" ftype="tabular" value="ref-tas.cnn" />
+            <param name="input_target_file" ftype="cnn" value="tumor.targetcoverage.cnn" />
+            <param name="input_antitarget_file" ftype="cnn" value="tumor.antitargetcoverage.cnn" />
+            <param name="input_reference_coverage_file" ftype="cnn" value="ref-tas.cnn" />
             <param name="no_edge" value="1" />
             <output name="out_sample_cnr">
                 <assert_contents><has_text text="chromosome"/></assert_contents>
--- a/macros.xml	Mon Jan 20 16:38:49 2025 +0000
+++ b/macros.xml	Sat Mar 01 12:00:32 2025 +0000
@@ -1,10 +1,10 @@
 <macros>
-    <token name="@VERSION_SUFFIX@">1</token>
-    <token name="@TOOL_VERSION@">0.9.11</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@TOOL_VERSION@">0.9.12</token>
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@TOOL_VERSION@">cnvkit</requirement>
-            <requirement type="package" version="1.4.2">scikit-learn</requirement>
+            <requirement type="package" version="1.21">samtools</requirement>
         </requirements>
     </xml>
     <xml name="reference_interface">
@@ -56,13 +56,13 @@
                 <option value="wgs">whole genome sequencing </option>
             </param>
             <param argument="--segment-method" type="select" label="Method used in the 'segment' step" help="">
-                <option value="cbs" selected="True">Circular Binary Segmentation CBS</option>
-                <option value="flasso">Fused lasso, hybrid flasso</option>
-                <option value="haar">a pure-Python implementation of HaarSeg, a wavelet-based method. Very fast and performs reasonably well on small panels, but tends to over-segment large datasets., hybrid haar</option>
-                <option value="none">simply calculate the weighted mean log2 value of each chromosome arm. Useful for testing or debugging, or as a baseline for benchmarking other methods., hybrid none</option>
-                <option value="hmm">experimental – a 3-state Hidden Markov Model suitable for most samples. Faster than CBS, and slower but more accurate than Haar. Requires the Python package pomegranate, as do the next two thods., hybrid hmm</option>
-                <option value="hmm-tumor">experimental – a 5-state HMM suitable for finer-grained segmentation of good-quality tumor samples. In particular, this method can detect focal amplifications within a larger-scale, smaller-amplitude copy number gain, or focal deep deletions within a larger-scale hemizygous loss. Training this model takes a bit more CPU time than the simpler hmm method., hybrid hmm-tumor</option>
-                <option value="hmm-germline">experimental – a 3-state HMM with fixed amplitude for the loss, neutral, and gain states corresponding to absolute copy numbers of 1, 2, and 3. Suitable for germline samples and single-cell sequencing of samples with mostly-diploid genomes that are not overly aneuploid., hybrid hmm-germline</option>
+                <option value="cbs" selected="True">CBS: Circular Binary Segmentation (default, precise)</option>
+                <option value="flasso">Flasso: Fused Lasso; smoother segments, fewer breakpoints</option>
+                <option value="haar">Haar: Haar wavelet transform; detects abrupt changes</option>
+                <option value="none">None: No segmentation; outputs bin-level data as segments</option>
+                <option value="hmm">Hmm: Basic Hidden Markov Model (generic use)</option>
+                <option value="hmm-tumor">Hmm-tumor: HMM tailored for tumor samples (somatic CNVs)</option>
+                <option value="hmm-germline">Hmm-germline: HMM for germline (inherited) variants (diploid assumption)</option>
             </param>
             <param argument="--male-reference" type="boolean" checked="false" truevalue="--male-reference" falsevalue="" label="Use or assume a male reference" help="female samples will have +1 log-CNR of chrX; otherwise male samples would have -1 chrX" />
             <param argument="--countreads" type="boolean" checked="false" truevalue="--countreads" falsevalue="" label="Get read depths by counting read midpoints within each bin" help="" />
@@ -70,7 +70,7 @@
     </xml>
     <xml name="create_CNV_reference_file">
         <param name="input_sample_file" type="data" format="bam" label="Sample BAM file" help="" />
-        <param argument="--normal" type="data" format="bam" label="Control BAM file" help="" />
+        <param argument="--normal" optional="true" type="data" format="bam" label="Control BAM file" help="It is optional only if You have a single sample or are working with WGS data with no normal samples" />
         <param argument="--targets" type="data" format="bed" label="Capture BED regions" help="" />
     </xml>
     <xml name="advanced_no_reference">
@@ -85,7 +85,7 @@
     </xml>
     <xml name="reuse_an_existing_cnv_reference_file">
         <param name="input_sample_file" type="data" format="bam" label="Sample file" help="" />
-        <param argument="--reference" type="data" format="tabular" label="CNV reference CNN File" help="" />
+        <param argument="--reference" type="data" format="cnn" label="CNV reference CNN File" help="" />
     </xml>
     <xml name="output_section">
         <section name="output_section" title="Outputs" expanded="false">
@@ -137,7 +137,7 @@
                 <option value="biweight">biweight</option>
             </param>
             <param argument="--center-at" optional="true" type="float" label="Subtract a constant number from all log2 ratios" value="" help="For manual re-centering, in case the --center option gives unsatisfactory results" />
-            <param argument="--thresholds" optional="true" type="text" label="Hard thresholds for calling each integer copy number, separated by commas" value="=-1.1,-0.25,0.2,0.7" help="Apply cutoffs to either original or rescaled log2 values" />
+            <param argument="--thresholds" type="text" label="Hard thresholds for calling each integer copy number, separated by commas" value="=-1.1,-0.25,0.2,0.7" help="Apply cutoffs to either original or rescaled log2 values" />
             <param argument="--ploidy" optional="true" type="integer" label="Ploidy of the sample cells" min="1" max="2" value="2" help="" />
             <param argument="--purity" optional="true" type="float" label="Estimated tumor cell fraction, a.k.a. purity or cellularity" min="0" max="1" value="" help="" />
             <param argument="--drop-low-coverage" type="boolean" checked="false" truevalue="--drop-low-coverage" falsevalue="" label="Drop very-low-coverage bins before segmentation" help="To avoid false-positive deletions in poor-quality tumor samples" />
@@ -145,32 +145,32 @@
     </xml>
     <xml name="additionally_SNP_process">
         <param argument="--vcf" optional="true" type="data" format="vcf" label="VCF file" help="VCF file name containing variants for calculation of b-allele frequencies" />
-        <param argument="--sample-id" optional="true" type="text" label="Name of the sample in the VCF to use for b-allele frequency extraction" value="" help="" />
-        <param argument="--normal-id" optional="true" type="text" label="Corresponding normal sample ID in the input VCF" value="" help="This sample is used to select only germline SNVs to calculate b-allele frequencies" />
+        <param argument="--sample-id" type="text" label="Name of the sample in the VCF to use for b-allele frequency extraction" help="" />
+        <param argument="--normal-id" type="text" label="Corresponding normal sample ID in the input VCF" help="This sample is used to select only germline SNVs to calculate b-allele frequencies" />
         <param argument="--min-variant-depth" type="integer" min="1" value="20" optional="true" label="Minimum read depth for a SNV to be used in the b-allele frequency calculation" help="" />
         <param argument="--zygosity-freq" type="float" min="0" value="0.25" optional="true" label="Ignore VCF's genotypes and instead infer zygosity from allele frequencies" help="" />
     </xml>
     <xml name="diagram_optional">
-            <param argument="--segment" optional="true" type="data" format="tabular" label="Segment" help="Segmentation calls cns, the output of the 'segment' command" />
+            <param argument="--segment" optional="true" type="data" format="cns,cnr" label="Segment" help="Segmentation calls cns, the output of the 'segment' command" />
             <param argument="--threshold" optional="true" type="float" label="Threshold" min="0" value="0.5" help="Copy number change threshold to label genes" />
             <param argument="--min-probes" optional="true" type="integer" label="Minimum propes" min="1" value="3" help="Minimum number of covered probes to label a gene" />
             <param argument="--male-reference" type="boolean" checked="false" truevalue="--male-reference" falsevalue="" label="MALE REFERENCE" help="Assume inputs were normalized to a male reference" />
             <param argument="--no-shift-xy" type="boolean" checked="false" truevalue="--no-shift-xy" falsevalue="" label="Don't adjust the X and Y chromosomes according to sample sex" help="" />
-            <param argument="--chromosome" optional="true" type="text" label="Chromosome to display" value="" help="e.g. 'chr1' no chromosomal range allowed" />
+            <param argument="--chromosome" type="text" label="Chromosome to display" help="e.g. 'chr1' no chromosomal range allowed" />
     </xml>
     <xml name="diagram_plot">
-        <param argument="--title" optional="true" type="text" label="Plot title" value="" help="" />
+        <param argument="--title" type="text" label="Plot title" help="" />
         <param argument="--no-gene-labels" type="boolean" checked="false" truevalue="--no-gene-labels" falsevalue="" label="Disable gene_name labels on plot useful when a lot of CNV were called" help="" />
     </xml>
     <xml name="heatmap_optional">
             <param argument="--by-bin" type="boolean" checked="false" truevalue="--by-bin" falsevalue="" label="Plot data x-coordinates by bin indices instead of genomic coordinates" help="" />
-            <param argument="--chromosome" optional="true" type="text" label="Chromosome range" value="" help="Chromosome or chromosomal range, e.g. 'chr1' or 'chr1:2333000-2444000'" />
+            <param argument="--chromosome" type="text" label="Chromosome range" help="Chromosome or chromosomal range, e.g. 'chr1' or 'chr1:2333000-2444000'" />
             <param argument="--desaturate" type="boolean" checked="false" truevalue="--desaturate" falsevalue="" label="Tweak color saturation to focus on significant changes" help="" />
             <param argument="--male-reference" type="boolean" checked="false" truevalue="--male-reference" falsevalue="" label="MALE REFERENCE" help="Assume inputs were normalized to a male reference" />
             <param argument="--no-shift-xy" type="boolean" checked="false" truevalue="--no-shift-xy" falsevalue="" label="Don't adjust the X and Y chromosomes according to sample sex" help="" />
             <param argument="--vertical" type="boolean" checked="false" truevalue="--vertical" falsevalue="" label="Plot heatmap with samples as X-axis instead of Y-axis" help="" />
             <param argument="--delimit-samples" type="boolean" checked="false" truevalue="--delimit-samples" falsevalue="" label="Add an horizontal delimitation line between each sample" help="" />
-            <param argument="--title" optional="true" type="text" label="Plot title" value="" help="" />
+            <param argument="--title" type="text" label="Plot title" help="" />
     </xml>
     <xml name="reference_optional">
             <param argument="--cluster" type="boolean" checked="false" truevalue="--cluster" falsevalue="" label="Calculate and store summary stats for clustered subsets of the normal samples with similar coverage profiles" help="" />
@@ -187,32 +187,32 @@
         <param argument="--no-rmask" type="boolean" checked="false" truevalue="--no-rmask" falsevalue="" label="skip repeat master correction" help="" />
     </xml>
     <xml name="scatter_optional">
-            <param argument="--segment" optional="true" type="data" format="tabular" label="Segment" help="Segmentation calls cns, the output of the 'segment' command" />
-            <param argument="--chromosome" optional="true" type="text" label="Chromosome range" value="" help="Chromosome or chromosomal range, e.g. 'chr1' or 'chr1:2333000-2444000'" />
-            <param argument="--gene" optional="true" type="text" label="Name of gene or genes comma-separated to display" value="" help="" />
+            <param argument="--segment" optional="true" type="data" format="cns,cnr" label="Segment" help="Segmentation calls cns, the output of the 'segment' command" />
+            <param argument="--chromosome" type="text" label="Chromosome range" help="Chromosome or chromosomal range, e.g. 'chr1' or 'chr1:2333000-2444000'" />
+            <param argument="--gene" type="text" label="Name of gene or genes comma-separated to display" help="" />
             <param argument="--range-list" optional="true" type="data" format="bed" label="Range list" help="File listing the chromosomal ranges to display, as BED"/>
             <param argument="--width" optional="true" type="integer" label="Width" min="1" value="1000000" help="Width of margin to show around the selected genes or small chromosomal region" />
     </xml>
     <xml name="scatter_plot">
-        <param argument="--antitarget-marker" optional="true" type="text" label="Antitarget marker" value="same as targets" help="Plot antitargets using this symbol when plotting in a selected chromosomal region"/>
+        <param argument="--antitarget-marker" type="text" label="Antitarget marker" value="same as targets" help="Plot antitargets using this symbol when plotting in a selected chromosomal region"/>
         <param argument="--by-bin" type="boolean" checked="false" truevalue="--by-bin" falsevalue="" label="Plot data x-coordinates by bin indices instead of genomic coordinates" help=""/>
-        <param argument="--segment-color" optional="true" type="text" label="Segment color" value="red" help=""/>
-        <param argument="--title" optional="true" type="text" label="Plot title" value="" help=""/>
+        <param argument="--segment-color" type="text" label="Segment color" value="red" help=""/>
+        <param argument="--title" type="text" label="Plot title" help=""/>
         <param argument="--trend" type="boolean" checked="false" truevalue="--trend" falsevalue="" label="Draw a smoothed local trendline on the scatter plot" help=""/>
         <param argument="--y-max" optional="true" type="integer" label="y-axis upper limit" min="1" value="" help=""/>
         <param argument="--y-min" optional="true" type="integer" label="y-axis lower limit" min="1" value="" help=""/>
         <param argument="--fig-size" optional="true" type="float" label="Width and height of the plot in inches" value="" help="Example 6.4 4.8, the space between the two inputs is important"/>
     </xml>
     <xml name="segment_optional">
-            <param argument="--dataframe" type="text" optional="true" label="Data frame" value="" help="File name to save the raw R dataframe emitted by CBS or Fused Lasso, example dataframe.r"/>
+            <param argument="--dataframe" type="text" label="Data frame" help="File name to save the raw R dataframe emitted by CBS or Fused Lasso, example dataframe.r"/>
             <param argument="--method" type="select" label="Segmentation method" help="">
-                <option value="cbs" selected="True">Circular Binary Segmentation CBS method,hybrid CBS</option>
-                <option value="flasso">Fused lasso, hybrid flasso</option>
-                <option value="haar">A pure-Python implementation of HaarSeg, a wavelet-based method. Very fast and performs reasonably well on small panels, but tends to over-segment large datasets., hybrid haar</option>
-                <option value="none">simply calculate the weighted mean log2 value of each chromosome arm. Useful for testing or debugging, or as a baseline for benchmarking other methods., hybrid none</option>
-                <option value="hmm">experimental – a 3-state Hidden Markov Model suitable for most samples. Faster than CBS, and slower but more accurate than Haar. Requires the Python package pomegranate, as do the next two methods., hybrid hmm</option>
-                <option value="hmm-tumor">experimental – a 5-state HMM suitable for finer-grained segmentation of good-quality tumor samples. In particular, this method can detect focal amplifications within a larger-scale, smaller-amplitude copy number gain, or focal deep deletions within a larger-scale hemizygous loss. Training this model takes a bit more CPU time than the simpler hmm method., hybrid hmm-tumor</option>
-                <option value="hmm-germline">experimental – a 3-state HMM with fixed amplitude for the loss, neutral, and gain states corresponding to absolute copy numbers of 1, 2, and 3. Suitable for germline samples and single-cell sequencing of samples with mostly-diploid genomes that are not overly aneuploid., hybrid hmm-germline</option>
+                <option value="cbs" selected="True">CBS: Circular Binary Segmentation (default, precise)</option>
+                <option value="flasso">Flasso: Fused Lasso; smoother segments, fewer breakpoints</option>
+                <option value="haar">Haar: Haar wavelet transform; detects abrupt changes</option>
+                <option value="none">None: No segmentation; outputs bin-level data as segments</option>
+                <option value="hmm">Hmm: Basic Hidden Markov Model (generic use)</option>
+                <option value="hmm-tumor">Hmm-tumor: HMM tailored for tumor samples (somatic CNVs)</option>
+                <option value="hmm-germline">Hmm-germline: HMM for germline (inherited) variants (diploid assumption)</option>
             </param>
             <param argument="--threshold" optional="true" type="integer" label="Significance threshold" min="1" help="To accept breakpoints during segmentation. For HMM methods, this is the smoothing window size"/>
             <param argument="--drop-low-coverage" type="boolean" checked="false" truevalue="--drop-low-coverage" falsevalue="" label="Drop very-low-coverage bins before segmentation" help="To avoid false-positive deletions in poor-quality tumor samples"/>
--- a/test-data/sample.cnv.vcf	Mon Jan 20 16:38:49 2025 +0000
+++ b/test-data/sample.cnv.vcf	Sat Mar 01 12:00:32 2025 +0000
@@ -1,6 +1,6 @@
 ##fileformat=VCFv4.2
-##fileDate=20250120
-##source=CNVkit v0.9.11
+##fileDate=20250203
+##source=CNVkit v0.9.12
 ##INFO=<ID=CIEND,Number=2,Type=Integer,Description="Confidence interval around END for imprecise variants">
 ##INFO=<ID=CIPOS,Number=2,Type=Integer,Description="Confidence interval around POS for imprecise variants">
 ##INFO=<ID=END,Number=1,Type=Integer,Description="End position of the variant described in this record">