changeset 3:44e4f5bfebde draft default tip

Uploaded
author fxce
date Mon, 16 Nov 2020 13:59:53 +0000
parents 472ea11c2b25
children
files my_VDM_tool.xml
diffstat 1 files changed, 328 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/my_VDM_tool.xml	Mon Nov 16 13:59:53 2020 +0000
@@ -0,0 +1,328 @@
+ <tool id="my_VDM_tool" name="VDM_tool" version="1.0.0">
+	<description>Map a mutation using the Variant Discovery Mapping approach (in silico bulk segregant linkage analysis).</description>
+	<stdio>
+		<exit_code range="1:"/>
+	</stdio>
+	<requirements>
+		<requirement type="package" version="3.2.1">R</requirement>
+		<requirement type="package" version="1.2.0">getopt</requirement>
+	</requirements>
+
+	<command>
+	Rscript "${__tool_directory__}/my_VDM_tool.R"
+		--inf "$inf"
+		#if $species.species_select=="Celegans"
+			--itype "$species.ce"
+		#else if $species.species_select=="Zebrafish"
+			--itype "$species.ze"
+		#else if $species.species_select=="Brachypodium"
+			--itype "$species.br"
+		#else if $species.species_select=="Arabidopsis"
+			--itype "$species.ar"
+		#else if $species.species_select=="other"
+			--itype "$species.ot"
+		#end if
+			
+		--qual $qual 
+		
+		#if $allfreq.allfreq_select=="AB"
+			--allr "$allfreq.ab"
+		#else if $allfreq.allfreq_select=="ratio"
+			--allr "$allfreq.ratio"
+		#end if
+		
+		#if $only_snp.only_snp_select=="TRUE"
+			--snp "$only_snp.true"
+		#else if $only_snp.only_snp_select=="FALSE"
+			--snp "$only_snp.false"
+		#end if
+
+		--freqthr "$freqthr"
+		
+		--lsp $lsp 
+		--pcol "$pcol" 
+		--lcol "$lcol" 
+		
+		#if $xaxis.xaxis_select=="TRUE"
+			--xstand $xaxis.true
+		#else if $xaxis.xaxis_select=="FALSE"
+			--xstand $xaxis.false
+		#end if
+		
+		--bsize $bsize
+
+		#if $binnorm.binnorm_select=="TRUE"
+			--bnorm $binnorm.true
+		#else if $binnorm.binnorm_select=="FALSE"
+			--bnorm $binnorm.false
+		#end if
+
+		#if $exclfiles.exclfiles_select=="FALSE"
+			--exclf $exclfiles.false
+		#else if $exclfiles.exclfiles_select=="TRUE"
+			--exclf $exclfiles.true
+		#end if
+		
+		--exclcol "$exclcol" 
+
+		--outn "$outn"
+		--pdfn "$pdfn"
+	</command>
+
+	<inputs>
+		<param type="data" name="inf" format="vcf" label="fastq file"/>
+
+		<conditional name="species">
+			<param name="species_select" type="select" label="Select the species">
+				<option value="Celegans">C. elegans</option>
+				<option value="Zebrafish">Zebrafish</option>
+				<option value="Brachypodium">Brachypodium</option>
+				<option value="Arabidopsis">Arabidopsis</option>
+				<option value="other">other</option>
+			</param>
+			<when value="Celegans">
+				<param name="ce" type="hidden" value="C.elegans" label="The C. elegans chromosome numbers and lengths (in Mb)" help=""/>
+			</when>
+			<when value="Zebrafish">
+				<param name="ze" type="hidden" value="Zebrafish" label="The Zebrafish chromosome numbers and lengths (in Mb)" help=""/>
+			</when>
+			<when value="Brachypodium">
+				<param name="br" type="hidden" value="Brachypodium" label="The Brachypodium chromosome numbers and lengths (in Mb)" help=""/>
+			</when>
+			<when value="Arabidopsis">
+				<param name="ar" type="hidden" value="Arabidopsis" label="The Arabidopsis chromosome numbers and lengths (in Mb)" help=""/>
+			</when>
+			<when value="other">
+				<param name="ot" type="data" format="tabular" label="Select file with chromosome numbers and lengths (in Mb) from your history" help="Table consisting of chromosome number in column 1 and length (in Mb) in column 2 (e.g. 'CHRI	16' or 'CHR1	16') with no column header names, tab-delimitation, and no quotation marks in a .txt file"/>
+			</when>
+		</conditional>
+			
+		<param type="float" name="qual" value="200" label="Filter by quality" help=""/>
+
+		<conditional name="allfreq">
+			<param name="allfreq_select" type="select" label="Select how allele ratios is calculated">
+				<option value="AB">AB</option>
+				<option value="ratio">AO/(AO+RO)</option>
+			</param>
+			<when value="AB">
+				<param name="ab" type="hidden" value="AB" label="Use AB field from Freebayes" help="Use AB field (from Freebayes) as the value for allele frequency"/>
+			</when>
+			<when value="ratio">
+				<param name="ratio" type="hidden" value="ratio" label="Use AO/(AO+RO) calculation from Freebayes" help="Use AO/(AO+RO) calculation (from Freebayes) as the value for allele frequency"/>
+			</when>
+			</conditional>
+
+		<conditional name="only_snp">
+			<param name="only_snp_select" type="select" label="Select type of variants to use for plotting">
+				<option value="TRUE">only SNP variants</option>
+				<option value="FALSE">all variant types</option>
+			</param>
+			<when value="TRUE">
+				<param name="true" type="hidden" value="TRUE" label="Use only SNP variants" help="Use only SNP variants"/>
+			</when>
+			<when value="FALSE">
+				<param name="false" type="hidden" value="FALSE" label="Use all types of variants" help="Use all types of variants"/>
+			</when>
+			</conditional>
+			
+		<param type="float" name="lsp" value="0.4" label="Loess span" help="Parameter that controls the smoothing of the Loess curve"/>
+		<param type="text" name="pcol" value="black" label="Colour of scatterplot points" help="See below for list of supported colors"/>
+		<param type="text" name="lcol" value="red" label="Colour of Loess curve" help="See below for list of supported colors"/>
+
+
+		<conditional name="xaxis">
+			<param name="xaxis_select" type="select" label="Spacing of x-axis intervals">
+				<option value="TRUE">Uniform lengths for Mb</option>
+				<option value="FALSE">Chromosome-scaled lengths for Mb</option>
+			</param>
+			<when value="TRUE">
+				<param name="true" type="hidden" value="TRUE" label="Uniform spacing of the x-axis based on Mb" help="Scale of x-axis (in Mb) is fixed for the scatter plots and frequency plots across all chromosomes"/>
+			</when>
+			<when value="FALSE">
+				<param name="false" type="hidden" value="FALSE" label="Variable spacing of the x-axis based on chromosome lengths" help="Scale of x-axis (in Mb) is dependent on chromosome length for the scatter plots and frequency plots for all chromosomes"/>
+			</when>
+			</conditional>
+
+		<param type="integer" name="bsize" value="1000000" label="Bin-size for frequency plots of homozygous variants" help=""/>
+		<param type="text" name="freqthr" value="0.0-1.0" label="Limits of allele ratios for variant to be considered homozygous" help="For frequency plots"/>
+		
+		<conditional name="binnorm">
+			<param name="binnorm_select" type="select" label="Normalization of y-axis in frequency plots">
+				<option value="FALSE">Actual Frequency</option>
+				<option value="TRUE">Normalized Frequency</option>
+			</param>
+			<when value="TRUE">
+				<param name="true" type="hidden" value="TRUE" label="Normalized y-axis frequency values based on formula" help="Normalisation formula as in cloudmap paper"/>
+			</when>
+			<when value="FALSE">
+				<param name="false" type="hidden" value="FALSE" label="Original frequency y-axis values" help=" "/>
+			</when>
+			</conditional>
+
+		<conditional name="exclfiles">
+			<param name="exclfiles_select" type="select" label="Additional exclusion of variants by subtraction">
+				<option value="FALSE">No</option>
+				<option value="TRUE">Yes</option>
+			</param>	
+			<when value="FALSE">
+				<param name="false" type="hidden" value="FALSE" label="No additional variant subtraction" help=""/>
+			</when>
+			<when value="TRUE">
+				<param name="true" type="data" format="tabular" label="Select variant lists to subtract from your history" help="Requires .txt file with tab-delimited columns for CHR POS REF ALT. Recommend directly using the tables generated by this tool from other samples or as a reference for the required format"/>			
+			</when>
+		</conditional>	
+			
+		<param type="text" name="exclcol" value="green" label="Colour of original Loess curve (before additional variant subtraction)" help="See below for list of supported colors"/>
+	</inputs>
+
+	<outputs>
+		<data name="outn" format="txt"/>
+		<data name="pdfn" format="pdf"/>
+	</outputs>
+
+
+	<tests>
+		<test>
+			<param name="inf" value="nor22.vcf"/>
+			<output name="outn" file="output.txt"/>
+		</test>
+	</tests>
+
+	<help><![CDATA[
+
+**What it does** 
+
+This tool generates plots based on variant allele ratios along the genome to facilitate mutation mapping using the Variant Discovery Mapping approach (a form of bulk segregant linkage analysis). It has a few basic filtering options built-in for improved functionality as it serves as the last step in the VDM pipeline [ref]. 
+
+Note that this tool is only compatible with VCF file generated by Freebayes variant calling and annotated with SnpEff. 
+
+It is a replacement for the deprecated cloudmap pipeline and plotting tool. For more details and see paper [link pending].
+
+A standalone R version of this code with additional data/metric outputs can be found at https://github.com/fxce/vdm_plot.
+
+ 
+
+------ 
+
+**OUTPUT** 
+
+------ 
+
+This tool generates a text file and a pdf file. 
+
+The text file lists the variants that have passed quality filtering along with information on position, allele ratio, details on the mutation, and gene annotation (as parsed from the Freebayes VCF INFO and SnpEff annotation). This offers an easy reference to check for variants after identifying a region of interest from the plots and the parsed format facilitates sorting in e.g. Excel for ones that affect protein-coding. 
+
+The pdf consists of (1) scatterplots based on allele ratios and their corresponding Loess curves along each chromosome; (2) barplots for the frequency of homozygous (ratio=1) variants along each chromosome. 
+
+ 
+
+------ 
+
+**INPUT** 
+
+------ 
+
+**Select data** 
+
+Load VCF file from history (generated by Freebayes and then annotated with SnpEff). 
+
+ 
+
+**Select species** 
+
+The configuration of chromosomes and sizes needs to be known and this tool is preloaded with this information for for C. elegans, Zebrafish, Brachypodium, and Arabidopsis. To use a custom chromosome configuration, a table containing chromosome numbers in column 1 and their corresponding sizes in Mb in column 2 should be prepared and saved as a tab-delimited .txt file with no column header names, and no quotation marks. Once uploaded to your Galaxy workspace/history, selecting option 'other' will allow for your custom file to be loaded from your history and used instead. e.g. 
+
+CHR1   16 
+
+CHR2   17 
+
+ 
+
+**Filter Quality** 
+
+Filter by quality, only variants with greater or equal quality values will be retained. Normally, a filter of quality 200 produces a good balance between reliable data and a reasonable amount of variants for plotting although this is dependent on your dataset. It may be useful to test with other quality values (e.g. 100 and 300) if too few or too many variants pass the filter. 
+
+ 
+
+**How allele ratio is calculated** 
+
+Option 'AB' uses the AB value from parsed from the VCF INFO that is generated by Freebayes. Option 'AO/(AO+RO)' calculates this ratio value from AO and RO values parsed from the VCF INFO that is generated by Freebayes. 
+
+The AB should equal AO/(AO+RO) in almost all cases, this option is included because there are reports that AB deviates for variants with low quality. 
+
+ 
+
+**Type of variants** 
+
+A choice between using only SNP variants or using all variants. 
+
+(The main reason to consider looking at only SNP variants is that Freebayes identifies a number of variants to have arisen from complex substitution/insertion/deletion/other events and it is uncertain how reproducibly some of these long stretches of sequence would be reported across different sequencing reactions. If inconsistent, some of these might be missed by subtraction of the variants in the reference/background leading to false positives.) 
+
+ 
+
+**Loess span** 
+
+This parameter controls the degree of smoothing for the Loess curve and can have a value between 0 and 1, higher values will result in smoother curves. 
+
+ 
+
+**Colour of the scatterplot points** 
+
+Any colour names compatible with default R can be used, e.g. can refer to http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf. 
+
+ 
+
+**Colour of Loess curve** 
+
+Any colour names compatible with default R can be used, e.g. can refer to http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf. 
+
+ 
+
+**Spacing of x-axis** 
+
+'Uniform spacing ...' sets the x-axis to be scaled the identically, i.e. the interval for 1Mb, across all chromosomes and for both scatterplots and barplots. 'Variable spacing...' sets the x-axis to scale depending on the length of the chromosome being plotted. 
+
+ 
+
+**Bin-size** 
+
+Specifies how the frequency for homozygous variants is binned (in bp) and plotted in the barplot with the default 1000000 setting each bar to span 1Mb. 
+
+ 
+
+**Limits for allele ratio to be considered homozygous** 
+
+Variants with allele ratios below the lower value will be considered homozygous REF and above the upper value will be considered homozygous ALT, this option only affects the barplots for the frequency of homozygous variants along chromosomes. Default "0-1" means allele ratios of 0 is considered homozygous for the reference allele and 1 is considered homozygous for the alternate allele. Using <1.0 for the upper limit can be used to visualise the profile of near-homozygous variants and may be more informative if the frequency of homozygous variants (allele ratio=1.0) is low e.g. due to suspected contamination of pooled sample with non-homozygous lines.  
+
+ 
+
+**Normalized y-axis frequency values** 
+
+Select between the actual frequency values for homozygous variants in the barplots and normalized frequency. Normalized frequency applies the formula from the Cloudmap paper (Minevich et al., 2012 Genetics 192(4):1249-1269) to improve mapping signal, though it is more relevant for Hawaiian mapping. 
+
+ 
+
+**Additional exclusion of variants** 
+
+This provides the option for a user submitted list of variants to be subtracted from the sample list before plotting and is compatible with the variant table generated by this tool. The subtraction is based on matching variants with identical chromosomal location, reference nucleotide, and alternate nucleotide which are then removed. Therefore, formatting of the subtraction list requires columns for chromosome, position, reference nucleotide, and alternate nucleotide which are named 'CHR   POS   REF   ALT', e.g. (+RATIO) 
+
+CHR   POS   REF   ALT 
+
+III       12      G       A 
+
+ 
+
+Note that selecting multiple subtraction files here is not cumulative, only one variant list is subtracted from the sample each time before the an output is generated. Merging variant lists from different samples would have to be performed manually before being uploaded and selected as a subtraction list.  
+ 
+ 
+
+**Colour of original Loess curve** 
+
+Any colour names compatible with default R can be used, e.g. can refer to http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf. 
+
+ 
+
+
+]]> 
+	</help>
+</tool>