Mercurial > repos > geert-vandeweyer > varamplicnv
changeset 0:febc6023d37b draft
Uploaded
author | geert-vandeweyer |
---|---|
date | Fri, 25 Sep 2020 08:29:36 +0000 |
parents | |
children | 5c324f9a4e20 |
files | varamplicnv-5bafb1c69d03/README.rst varamplicnv-5bafb1c69d03/VarAmpliCNV_Anno.xml varamplicnv-5bafb1c69d03/VarAmpliCNV_CallCNVs.xml varamplicnv-5bafb1c69d03/VarAmpliCNV_Count.xml varamplicnv-5bafb1c69d03/VarAmpliCNV_GC.xml varamplicnv-5bafb1c69d03/VarAmpliCNV_MergeCounts.xml varamplicnv-5bafb1c69d03/tool-data/TwoBit.loc.sample varamplicnv-5bafb1c69d03/tool_data_table_conf.xml.sample |
diffstat | 8 files changed, 303 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/varamplicnv-5bafb1c69d03/README.rst Fri Sep 25 08:29:36 2020 +0000 @@ -0,0 +1,4 @@ +varAmpliCNV +=========== + +Wrappers for the varAmpliCNV package for HaloPlex CNV calling
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/varamplicnv-5bafb1c69d03/VarAmpliCNV_Anno.xml Fri Sep 25 08:29:36 2020 +0000 @@ -0,0 +1,51 @@ +<tool id="VarAmpliCNV_Anno" name="Annotate Amplicons" version="0.1.0" python_template_version="3.5"> + <requirements> + <container type="docker">cmgantwerpen/varamplicnv:1.0.0</container> + + </requirements> + <command detect_errors="exit_code"><![CDATA[ + processAmpliconRegion.py + + ## input amplicon file + -a '$AmpInput' + + ## ROI input + -r '$roiInput' + + ## snpInput (probes to discard) + #if $snpInput + -s $snpInput + #end if + + ## output files + -D '$dedupOut' + -R '$roiOut' + + + ]]></command> + <inputs> + <param name="AmpInput" type="data" format="bed" optional="false" label="Amplicon Design File (BED)" help="This file is the HaloPlex design file, containing individual amplicons." /> + <param name="roiInput" type="data" format="bed" optional="false" label="ROI file (BED)" help="Region of Interest definition. Typically one line per Exon of the target genes bis." /> + <param name="snpInput" type="data" format="bed" optional="true" label="Ignore List (BED)" help="A subset of amplicons you do not want to analyze (e.g. a co-enriched panel)." /> + </inputs> + <outputs> + <data name="dedupOut" format="bed" label="VarAmpliCNV on ${on_string}: DeDuplicated Amplicon List" /> + <data name="roiOut" format="bed" label="VarAmpliCNV on ${on_string}: ROI-Annotated Amplicon List" /> + </outputs> +<help> +**VarAmpliCNV : BED file PreProcessing** + +Preprocessing includes removing SNP (unwanted amplicons) coordinates and duplicate coordinates. It also adds Region annotations to input region of interest (ROI) used during plotting. + +**Parameters are :** + +* Amplicon Design File (BED) : This is the *exact* BED file provided by HaloPlex, containing the restriction fragments. +* ROI file (BED) : Typically the file provided *to* HaloPlex as the basis of the design. Names in column 4 are used to annotate CNV plots +* Ignore list (BED) : (optional) Provide amplicons present in amplicon design, to exclude during the analysis + +**Output files :** + +* DeDuplicated amplicon List (BED) : Use this file in subsequent steps (GC and Counting). +* ROI-Annotated List : This file is needed during CNV-calling. +</help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/varamplicnv-5bafb1c69d03/VarAmpliCNV_CallCNVs.xml Fri Sep 25 08:29:36 2020 +0000 @@ -0,0 +1,96 @@ +<tool id="VarAmpliCNV_CallCNVs" name="Call CNVs" version="0.1.0" > + <requirements> + <container type="docker">cmgantwerpen/varamplicnv:1.0.0</container> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + ## make output folder. + mkdir Output + + && + + varAmpliCNV.R + + ## counts object + -i $RData + + ## deduped amplicons. + -b $BedFile + + ## Gene information + -r $RoiFile + + ## GC content + -c $GcFile + + ## genders (if specified) + #if $GenderFile + -s $GenderFile + #end if + + ## output folder + -o Output + + ## variance to remove + -p $VarProp + + ## analysis type + -n $aTypeSelect.aType + + ## thresholds + -d $aTypeSelect.delT + -D $aTypeSelect.dupT + + ]]></command> + <inputs> + <param name="RData" type="data" format="rdata" optional="false" label="Sample Amplicon Counts" help="This file is created by the VarAmpliCNV 'Merge Counts' tool." /> + <param name="BedFile" type="data" format="bed" optional="false" label="Amplicon BED file (duplicates removed)" help="This file is created by the VarAmpliCNV 'annotate' tool." /> + <param name="RoiFile" type="data" format="bed" optional="false" label="ROI-Amplicon file" help="This file is created by the VarAmpliCNV 'annotate' tool." /> + <param name="GcFile" type="data" format="txt" optional="false" label="Amplicon GC content" help="This file is created by the VarAmpliCNV 'Get GC Content' tool."/> + <param name="GenderFile" type="data" format="txt" optional="true" label="Sample Genders" help="Sample genders for handling X-CNVs. See Documentation for format information"/> + <param name="VarProp" type="float" value="0.8" label="Fraction of Variance to remove during MDS" help="Default: 0.8" /> + <conditional name="aTypeSelect"> + <param name="aType" type="select" label="Analysis Type" help="Use 'Direct' Segmentation (pure CBS), or apply post-segmentation 'AOF' (Amplicon Overlap Filtering)"> + <option value='0'>Direct Segmentation</option> + <option value='1' selected="TRUE">Amplicon Overlap Filtering</option> + </param> + <when value='0'> + <param name="delT" value='-0.5' label="LogR Threshold for Deletions" type='float'/> + <param name="dupT" value="0.5" label="LogR Threshold for Duplications" type='float'/> + </when> + <when value='1'> + <param name="delT" value="-0.2" label="LogR Threshold for Deletions" type='float'/> + <param name="dupT" value="0.38" label="LogR Threshold for Duplications" type='float'/> + </when> + </conditional> + </inputs> + <outputs> + <data name="outfile" format="pdf" label="VarAmpliCNV on ${on_string}: Results" > + <discover_datasets pattern="__designation_and_ext__" directory="Output" visible="true" assign_primary_output="true" /> + </data> + </outputs> + <help> +**VarAmpliCNV : Call CNVs** + +During CNV calling read counts are normalized over all samples inluced during "count merging", a set fraction of variance is removed and circular binary segmentation is applied to identify CNVs. If specified, a post-processing step is applied to take amplicon size and overlap into account to estimate reliability of the event. Passing CNVs are plotted gene-by-gene. + +**Parameters are :** + +* Sample Amplicon Counts (RData) : The result from the 'Merge Counts' tool. It contains a raw sample-by-amplicon count matrix. +* Amplicon BED file (BED) : This is the *exact* BED file provided by HaloPlex for the used library, with duplicates removed using the VarAmpliCNV "Annotate" tool. +* ROI file (BED) : This is the *exact* BED file provided by HaloPlex for the used library, with duplicates removed and annotated with gene symbols using the VarAmpliCNV "Annotate" tool. +* Amplicon GC Content (txt) : GC-content of individual amplicons, used for count correction. Generated using VarAmpliCNV 'Amplicon GC-Content' tool. +* Sample Genders (txt) : Optional. If specified, build gender-specific normalization sets for X and Y chromosomes. Format is tab-separated : SamplenName<tab>M/F/U +* Fraction of Variance to Remove : Using an approach similar to Principal component analysis, a preset fraction of noise is removed from the data. Higher values typically result in less CNVs. +* Analysis Type : Direct Segmentation applies only CBS and will not plot results. Amplicon Overlap Filtering is a post-processing filter to improve specificity, and will also enable plotting. The full CBS-results are always returned for manual inspection. +* Thresholds : Set mininal values for LogR-based filtering of the called Segments. + + +**Output files :** + +* Parameter_settings : Overview of set and derived settings + a list of discarded samples +* Plots.Quality_Measures : Quality metrics: coverage, variance by PC, GC-coverage-correlation +* Plots.Results : Gene-based CNV plots for segments passing the filters (if AOF is activated) +* Table.Results.Full : Full CBS results +* Table.Results.Filtered : Filtered CBS results (on LogR). +</help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/varamplicnv-5bafb1c69d03/VarAmpliCNV_Count.xml Fri Sep 25 08:29:36 2020 +0000 @@ -0,0 +1,54 @@ +<tool id="VarAmpliCNV_Count" name="Count Aligned Amplicons" version="0.1.0" > + <requirements> + <container type="docker">cmgantwerpen/varamplicnv:1.0.0</container> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + ##prepare input and indices + ln -s '$bam' infile && + #if $bam.is_of_type('bam'): + #if str( $bam.metadata.bam_index ) != "None": + ln -s '${bam.metadata.bam_index}' infile.bai && + #else: + samtools index infile infile.bai && + #end if + #elif $bam.is_of_type('cram'): + #if str( $bam.metadata.cram_index ) != "None": + ln -s '${bam.metadata.cram_index}' infile.crai && + #else: + samtools index infile infile.crai && + #end if + #end if + + ## Run Counter. + parseBAM.py '$bed' 'infile' '$counts' '$unmapped' '$stats' + + ]]></command> + <inputs> + <param name="bed" type="data" format="BED" optional="false" label="Unique Amplicons (BED)" help="This file is created by the VarAmpliCNV 'Annotate Amplicons' tool." /> + <param name="bam" type="data" format="bam" optional="false" label="BAM file" help="Sequencing data file" /> + </inputs> + <outputs> + <data name="counts" format="tabular" label="VarAmpliCNV on ${on_string}: Counts" /> + <data name="unmapped" format="bam" label="VarAmpliCNV on ${on_string}: UnMapped Reads" /> + <data name="stats" format="txt" label="VarAmpliCNV on ${on_string}: Statistics" /> + </outputs> + <help> +**VarAmpliCNV : Counting** + +BAM files are parsed for readpairs exactly matching specified amplicons, based on start and end position. + +**Parameters are :** + +* Amplicon Design File (BED) : The de-duplicated amplicon list, generated by "varAmpliCNV Annotate". +* Sample Data (BAM) : The sample read data, provided as a single BAM file, or a collection of BAM files. + +**Output files :** + +* Count file (txt): the amplicon-read table. +* Unmapped (BAM) : Reads not matching amplicons. This bam file can be used to investigate issues. +* Stats (txt): Overview of the matching performance. Use it to investigate issues. + +In case a collection of BAM files is provided, the output files will be grouped in collections as well. + + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/varamplicnv-5bafb1c69d03/VarAmpliCNV_GC.xml Fri Sep 25 08:29:36 2020 +0000 @@ -0,0 +1,43 @@ +<tool id="VarAmpliCNV_GC" name="Get Amplicon GC-Content" version="0.1.0" python_template_version="2.7"> + <requirements> + <container type="docker">cmgantwerpen/varamplicnv:1.0.0</container> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + getGCAmplicon.py + + ## amplicon input file + -a '$input' + + ## output file + -o '$gcOut' + + ## 2bit reference gene file + -f $index + + ]]></command> + <inputs> + <param name="input" type="data" format="bed" optional="false" label="Amplicon Design File (BED)" help="This file is the HaloPlex design file, containing individual amplicons." /> + <param name="index" type="select"> + <options from_data_table="TwoBitDocker" /> + </param> + </inputs> + <outputs> + <data name="gcOut" format="tabular" label="VarAmpliCNV on ${on_string}: GC-content" /> + <!--<data name="ampliconOut" format="tabular" label="Amplicon Gene annotations" />--> + </outputs> + <help> +**VarAmpliCNV : GC-calculation** + +Calculate the GC-content of entries in a BED file. + +**Parameters are :** + +* Amplicon Design File (BED) : The de-duplicated amplicon list, generated by "varAmpliCNV Annotate". +* Genome Build : Select a genome build from the configured options to extract GC content from. + +**Output files :** + +* GC-content (txt) : Tabular file containing GC information per region. + +</help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/varamplicnv-5bafb1c69d03/VarAmpliCNV_MergeCounts.xml Fri Sep 25 08:29:36 2020 +0000 @@ -0,0 +1,39 @@ +<tool id="VarAmpliCNV_MergeCounts" name="Merge Counts" version="0.1.0" > + <requirements> + <container type="docker">cmgantwerpen/varamplicnv:1.0.0</container> + </requirements> + + <command detect_errors="exit_code"><![CDATA[ + mkdir -p infiles + + #for $element in $inputFile: + && bname=\$(basename ${element}) + && ln -s "${element}" "infiles/\$bname.txt" + #end for + + && + + MergeCounts.py -d 'infiles' -o "$outfile" + + ]]></command> + <inputs> + <param name="inputFile" type="data" format="txt" optional="false" multiple="True" label="Sample Amplicon Counts" help="This file is created by the VarAmpliCNV 'Count' tool." /> + </inputs> + <outputs> + <data name="outfile" format="rdata" label="VarAmpliCNV on ${on_string}: Aggregated Counts" /> + </outputs> + <help> +**VarAmpliCNV : Merge Count files** + +Merge a list of count files from "varAmpliCNV Count" into a single cohort for CNV-calling. + +**Parameters are :** + +* Count file (txt) : A list or collection of count files from "varAmpliNCV Count". + + +**Output files :** + +* Count-by-Sample Matrix (Rdata) : Rdata object containing counts for all provided samples. + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/varamplicnv-5bafb1c69d03/tool-data/TwoBit.loc.sample Fri Sep 25 08:29:36 2020 +0000 @@ -0,0 +1,9 @@ +# This file lists 2bit indices used for GC-computation in VarAmpliCNV + +# white space is TAB ! + +# make sure to add the path to docker_volumes ! + +#<unique_build_id> <dbkey> <display_name> <file_path> +# +#hg19 hg19 Human: hg19/GRCh37 /opt/NGS/References/hg19/2bit/hg19.2bit
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/varamplicnv-5bafb1c69d03/tool_data_table_conf.xml.sample Fri Sep 25 08:29:36 2020 +0000 @@ -0,0 +1,7 @@ +<tables> + <!-- Locations of 2bit files made available under docker paths --> + <table name="TwoBitDocker" comment_char="#"> + <columns>short, dbkey, name, value</columns> + <file path="tool-data/TwoBit.loc" /> + </table> +</tables>