changeset 0:febc6023d37b draft

Uploaded
author geert-vandeweyer
date Fri, 25 Sep 2020 08:29:36 +0000
parents
children 5c324f9a4e20
files varamplicnv-5bafb1c69d03/README.rst varamplicnv-5bafb1c69d03/VarAmpliCNV_Anno.xml varamplicnv-5bafb1c69d03/VarAmpliCNV_CallCNVs.xml varamplicnv-5bafb1c69d03/VarAmpliCNV_Count.xml varamplicnv-5bafb1c69d03/VarAmpliCNV_GC.xml varamplicnv-5bafb1c69d03/VarAmpliCNV_MergeCounts.xml varamplicnv-5bafb1c69d03/tool-data/TwoBit.loc.sample varamplicnv-5bafb1c69d03/tool_data_table_conf.xml.sample
diffstat 8 files changed, 303 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/varamplicnv-5bafb1c69d03/README.rst	Fri Sep 25 08:29:36 2020 +0000
@@ -0,0 +1,4 @@
+varAmpliCNV
+===========
+
+Wrappers for the varAmpliCNV package for HaloPlex CNV calling
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/varamplicnv-5bafb1c69d03/VarAmpliCNV_Anno.xml	Fri Sep 25 08:29:36 2020 +0000
@@ -0,0 +1,51 @@
+<tool id="VarAmpliCNV_Anno" name="Annotate Amplicons" version="0.1.0" python_template_version="3.5">
+    <requirements>
+	<container type="docker">cmgantwerpen/varamplicnv:1.0.0</container>
+
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+	processAmpliconRegion.py
+	
+	## input amplicon file
+	-a '$AmpInput' 
+
+	## ROI input
+	-r '$roiInput'
+
+	## snpInput (probes to discard)
+	#if $snpInput
+		-s $snpInput
+        #end if
+
+	## output files
+	-D '$dedupOut'
+	-R '$roiOut'
+	
+	
+	]]></command>
+    <inputs>
+	<param name="AmpInput" type="data" format="bed" optional="false" label="Amplicon Design File (BED)" help="This file is the HaloPlex design file, containing individual amplicons." />
+	<param name="roiInput" type="data" format="bed" optional="false" label="ROI file (BED)" help="Region of Interest definition. Typically one line per Exon of the target genes bis." />
+	<param name="snpInput" type="data" format="bed" optional="true" label="Ignore List (BED)" help="A subset of amplicons you do not want to analyze (e.g. a co-enriched panel)." />
+    </inputs>
+    <outputs>
+	<data name="dedupOut" format="bed" label="VarAmpliCNV on ${on_string}: DeDuplicated Amplicon List" />
+	<data name="roiOut" format="bed" label="VarAmpliCNV on ${on_string}: ROI-Annotated Amplicon List" />
+    </outputs>
+<help>
+**VarAmpliCNV : BED file PreProcessing**
+
+Preprocessing includes removing SNP (unwanted amplicons) coordinates and duplicate coordinates. It also adds Region annotations to input region of interest (ROI) used during plotting.
+
+**Parameters are :** 
+
+* Amplicon Design File (BED) : This is the *exact* BED file provided by HaloPlex, containing the restriction fragments.
+* ROI file (BED) : Typically the file provided *to* HaloPlex as the basis of the design. Names in column 4 are used to annotate CNV plots
+* Ignore list (BED) : (optional) Provide amplicons present in amplicon design, to exclude during the analysis
+
+**Output files :**
+
+* DeDuplicated amplicon List (BED) : Use this file in subsequent steps (GC and Counting).
+* ROI-Annotated List : This file is needed during CNV-calling.
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/varamplicnv-5bafb1c69d03/VarAmpliCNV_CallCNVs.xml	Fri Sep 25 08:29:36 2020 +0000
@@ -0,0 +1,96 @@
+<tool id="VarAmpliCNV_CallCNVs" name="Call CNVs" version="0.1.0" >
+    <requirements>
+	<container type="docker">cmgantwerpen/varamplicnv:1.0.0</container>
+    </requirements>
+     <command detect_errors="exit_code"><![CDATA[
+	## make output folder.
+	mkdir Output 
+
+	&& 
+	
+	varAmpliCNV.R  
+
+	## counts object
+	-i $RData
+
+	## deduped amplicons.
+	-b $BedFile
+
+	## Gene information
+	-r $RoiFile
+
+	## GC content
+	-c $GcFile
+
+	## genders (if specified)
+        #if $GenderFile 
+                -s $GenderFile
+        #end if
+
+	## output folder
+	-o Output
+
+	## variance to remove
+	-p $VarProp
+
+	## analysis type
+	-n $aTypeSelect.aType
+
+	## thresholds
+	-d $aTypeSelect.delT
+	-D $aTypeSelect.dupT
+	
+	]]></command>
+    <inputs>
+	<param name="RData" type="data" format="rdata" optional="false" label="Sample Amplicon Counts" help="This file is created by the VarAmpliCNV 'Merge Counts' tool." />
+	<param name="BedFile" type="data" format="bed" optional="false" label="Amplicon BED file (duplicates removed)" help="This file is created by the VarAmpliCNV 'annotate' tool." />
+	<param name="RoiFile" type="data" format="bed" optional="false" label="ROI-Amplicon file" help="This file is created by the VarAmpliCNV 'annotate' tool." />
+	<param name="GcFile" type="data" format="txt" optional="false" label="Amplicon GC content" help="This file is created by the VarAmpliCNV 'Get GC Content' tool."/>
+	<param name="GenderFile" type="data" format="txt" optional="true" label="Sample Genders" help="Sample genders for handling X-CNVs. See Documentation for format information"/>
+	<param name="VarProp" type="float" value="0.8" label="Fraction of Variance to remove during MDS" help="Default: 0.8" />	
+	<conditional name="aTypeSelect">
+	   <param name="aType" type="select" label="Analysis Type" help="Use 'Direct' Segmentation (pure CBS), or apply post-segmentation 'AOF' (Amplicon Overlap Filtering)">
+		<option value='0'>Direct Segmentation</option>
+		<option value='1' selected="TRUE">Amplicon Overlap Filtering</option>
+	   </param>
+	   <when value='0'>
+	    <param name="delT" value='-0.5' label="LogR Threshold for Deletions" type='float'/>
+		<param name="dupT" value="0.5" label="LogR Threshold for Duplications" type='float'/>
+	   </when>
+       <when value='1'>
+	    <param name="delT" value="-0.2" label="LogR Threshold for Deletions" type='float'/>
+		<param name="dupT" value="0.38" label="LogR Threshold for Duplications" type='float'/>
+	   </when>
+	</conditional>
+    </inputs>
+    <outputs>
+	<data name="outfile" format="pdf" label="VarAmpliCNV on ${on_string}: Results" >
+		<discover_datasets pattern="__designation_and_ext__" directory="Output" visible="true" assign_primary_output="true" />
+	</data>
+    </outputs>
+    <help>
+**VarAmpliCNV : Call CNVs**
+
+During CNV calling read counts are normalized over all samples inluced during "count merging", a set fraction of variance is removed and circular binary segmentation is applied to identify CNVs. If specified, a post-processing step is applied to take amplicon size and overlap into account to estimate reliability of the event. Passing CNVs are plotted gene-by-gene. 
+
+**Parameters are :** 
+
+* Sample Amplicon Counts (RData) : The result from the 'Merge Counts' tool. It contains a raw sample-by-amplicon count matrix. 
+* Amplicon BED file (BED) : This is the *exact* BED file provided by HaloPlex for the used library, with duplicates removed using the VarAmpliCNV "Annotate" tool.
+* ROI file (BED) : This is the *exact* BED file provided by HaloPlex for the used library, with duplicates removed and annotated with gene symbols using the VarAmpliCNV "Annotate" tool.
+* Amplicon GC Content (txt) : GC-content of individual amplicons, used for count correction. Generated using VarAmpliCNV 'Amplicon GC-Content' tool. 
+* Sample Genders (txt) : Optional. If specified, build gender-specific normalization sets for X and Y chromosomes. Format is tab-separated : SamplenName&lt;tab&gt;M/F/U
+* Fraction of Variance to Remove : Using an approach similar to Principal component analysis, a preset fraction of noise is removed from the data. Higher values typically result in less CNVs. 
+* Analysis Type : Direct Segmentation applies only CBS and will not plot results. Amplicon Overlap Filtering is a post-processing filter to improve specificity, and will also enable plotting. The full CBS-results are always returned for manual inspection. 
+* Thresholds : Set mininal values for LogR-based filtering of the called Segments. 
+
+
+**Output files :**
+
+* Parameter_settings : Overview of set and derived settings + a list of discarded samples
+* Plots.Quality_Measures : Quality metrics: coverage, variance by PC, GC-coverage-correlation
+* Plots.Results : Gene-based CNV plots for segments passing the filters (if AOF is activated)
+* Table.Results.Full : Full CBS results
+* Table.Results.Filtered : Filtered CBS results (on LogR).  
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/varamplicnv-5bafb1c69d03/VarAmpliCNV_Count.xml	Fri Sep 25 08:29:36 2020 +0000
@@ -0,0 +1,54 @@
+<tool id="VarAmpliCNV_Count" name="Count Aligned Amplicons" version="0.1.0" >
+    <requirements>
+	<container type="docker">cmgantwerpen/varamplicnv:1.0.0</container>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        ##prepare input and indices 
+        ln -s '$bam' infile &&
+        #if $bam.is_of_type('bam'):
+            #if str( $bam.metadata.bam_index ) != "None":
+                ln -s '${bam.metadata.bam_index}' infile.bai &&
+            #else:
+                samtools index infile infile.bai &&
+            #end if
+        #elif $bam.is_of_type('cram'):
+            #if str( $bam.metadata.cram_index ) != "None":
+                ln -s '${bam.metadata.cram_index}' infile.crai &&
+            #else:
+                samtools index infile infile.crai &&
+            #end if
+        #end if
+
+	## Run Counter.
+	parseBAM.py '$bed' 'infile' '$counts' '$unmapped' '$stats' 
+
+	]]></command>
+    <inputs>
+	<param name="bed" type="data" format="BED" optional="false" label="Unique Amplicons (BED)" help="This file is created by the VarAmpliCNV 'Annotate Amplicons' tool." />
+	<param name="bam" type="data" format="bam" optional="false" label="BAM file" help="Sequencing data file" />
+    </inputs>
+    <outputs>
+	<data name="counts" format="tabular" label="VarAmpliCNV on ${on_string}: Counts" />
+	<data name="unmapped" format="bam" label="VarAmpliCNV on ${on_string}: UnMapped Reads" />
+	<data name="stats" format="txt" label="VarAmpliCNV on ${on_string}: Statistics" />
+    </outputs>
+    <help>
+**VarAmpliCNV : Counting**
+
+BAM files are parsed for readpairs exactly matching specified amplicons, based on start and end position. 
+
+**Parameters are :** 
+
+* Amplicon Design File (BED) : The de-duplicated amplicon list, generated by "varAmpliCNV Annotate".
+* Sample Data (BAM) : The sample read data, provided as a single BAM file, or a collection of BAM files. 
+
+**Output files :**
+
+* Count file (txt): the amplicon-read table. 
+* Unmapped (BAM)  : Reads not matching amplicons. This bam file can be used to investigate issues.
+* Stats (txt): Overview of the matching performance. Use it to investigate issues.
+
+In case a collection of BAM files is provided, the output files will be grouped in collections as well. 
+
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/varamplicnv-5bafb1c69d03/VarAmpliCNV_GC.xml	Fri Sep 25 08:29:36 2020 +0000
@@ -0,0 +1,43 @@
+<tool id="VarAmpliCNV_GC" name="Get Amplicon GC-Content" version="0.1.0" python_template_version="2.7">
+    <requirements>
+	<container type="docker">cmgantwerpen/varamplicnv:1.0.0</container>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+	getGCAmplicon.py 
+
+	## amplicon input file
+	-a '$input' 
+
+	## output file 
+	-o '$gcOut' 
+
+	## 2bit reference gene file
+	-f $index
+	 
+	]]></command>
+    <inputs>
+	<param name="input" type="data" format="bed" optional="false" label="Amplicon Design File (BED)" help="This file is the HaloPlex design file, containing individual amplicons." />
+	<param name="index" type="select">
+		<options from_data_table="TwoBitDocker" />
+	</param>
+    </inputs>
+    <outputs>
+	<data name="gcOut" format="tabular" label="VarAmpliCNV on ${on_string}: GC-content" />
+	<!--<data name="ampliconOut" format="tabular" label="Amplicon Gene annotations" />-->
+    </outputs>
+    <help>
+**VarAmpliCNV : GC-calculation**
+
+Calculate the GC-content of entries in a BED file. 
+
+**Parameters are :** 
+
+* Amplicon Design File (BED) : The de-duplicated amplicon list, generated by "varAmpliCNV Annotate".
+* Genome Build : Select a genome build from the configured options to extract GC content from. 
+
+**Output files :**
+
+* GC-content (txt) : Tabular file containing GC information per region. 
+
+</help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/varamplicnv-5bafb1c69d03/VarAmpliCNV_MergeCounts.xml	Fri Sep 25 08:29:36 2020 +0000
@@ -0,0 +1,39 @@
+<tool id="VarAmpliCNV_MergeCounts" name="Merge Counts" version="0.1.0" >
+    <requirements>
+	<container type="docker">cmgantwerpen/varamplicnv:1.0.0</container>
+    </requirements>
+ 
+   <command detect_errors="exit_code"><![CDATA[
+	mkdir -p infiles 
+        
+	#for $element in $inputFile:
+      	   && bname=\$(basename ${element}) 
+           && ln -s "${element}" "infiles/\$bname.txt" 
+    	#end for	
+	
+	&& 
+	
+	MergeCounts.py -d 'infiles' -o "$outfile"
+	
+	]]></command>
+    <inputs>
+	<param name="inputFile" type="data" format="txt" optional="false" multiple="True" label="Sample Amplicon Counts" help="This file is created by the VarAmpliCNV 'Count' tool." />
+    </inputs>
+    <outputs>
+	<data name="outfile" format="rdata" label="VarAmpliCNV on ${on_string}: Aggregated Counts" />
+    </outputs>
+    <help>
+**VarAmpliCNV : Merge Count files**
+
+Merge a list of count files from "varAmpliCNV Count" into a single cohort for CNV-calling.
+
+**Parameters are :** 
+
+* Count file (txt) : A list or collection of count files from "varAmpliNCV Count".
+
+
+**Output files :**
+
+* Count-by-Sample Matrix (Rdata) : Rdata object containing counts for all provided samples.   
+    </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/varamplicnv-5bafb1c69d03/tool-data/TwoBit.loc.sample	Fri Sep 25 08:29:36 2020 +0000
@@ -0,0 +1,9 @@
+# This file lists 2bit indices used for GC-computation in VarAmpliCNV
+
+# white space is TAB !
+
+# make sure to add the path to docker_volumes ! 
+
+#<unique_build_id> <dbkey> <display_name> <file_path>
+#
+#hg19	hg19	Human: hg19/GRCh37	/opt/NGS/References/hg19/2bit/hg19.2bit
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/varamplicnv-5bafb1c69d03/tool_data_table_conf.xml.sample	Fri Sep 25 08:29:36 2020 +0000
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of 2bit files made available under docker paths -->
+    <table name="TwoBitDocker" comment_char="#">
+        <columns>short, dbkey, name, value</columns>
+        <file path="tool-data/TwoBit.loc" />
+    </table>
+</tables>