Mercurial > repos > greg > vsnp_add_zero_coverage
diff vsnp_add_zero_coverage.xml @ 0:3cb0bf7e1b2d draft
Uploaded
author | greg |
---|---|
date | Tue, 21 Apr 2020 09:44:38 -0400 |
parents | |
children | 01312f8a6ca9 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vsnp_add_zero_coverage.xml Tue Apr 21 09:44:38 2020 -0400 @@ -0,0 +1,175 @@ +<tool id="vsnp_add_zero_coverage" name="vSNP: add zero coverage" version="1.0.0"> + <description></description> + <requirements> + <requirement type="package" version="1.76">biopython</requirement> + <requirement type="package" version="1.16.5">numpy</requirement> + <requirement type="package" version="0.25.3">pandas</requirement> + <requirement type="package" version="0.15.4">pysam</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ +#import os +#import re +#set input_type = $input_type_cond.input_type +#set input_bam_dir = 'input_bam_dir' +#set input_vcf_dir = 'input_vcf_dir' +#set output_vcf_dir = 'output_vcf_dir' +#set output_metrics_dir = 'output_metrics_dir' +mkdir -p $input_bam_dir && +mkdir -p $input_vcf_dir && +mkdir -p $output_vcf_dir && +mkdir -p $output_metrics_dir && +#if str($input_type) == "single": + #set bam_input = $input_type_cond.bam_input + #set file_name = $bam_input.file_name + #set file_name_base = $os.path.basename($file_name) + ln -s $file_name $input_bam_dir/$file_name_base && + #set vcf_input = $input_type_cond.vcf_input + #set file_name = $vcf_input.file_name + #set file_name_base = $os.path.basename($file_name) + ln -s $file_name $input_vcf_dir/$file_name_base && +#else: + #for $i in $input_type_cond.bam_input_collection: + #set filename = $i.file_name + #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier)) + ln -s $filename $input_bam_dir/$identifier && + #end for + #for $i in $input_type_cond.vcf_input_collection: + #set filename = $i.file_name + #set identifier = re.sub('[^\s\w\-]', '_', str($i.element_identifier)) + ln -s $filename $input_vcf_dir/$identifier && + #end for +#end if +python '$__tool_directory__/vsnp_add_zero_coverage.py' +--processes $processes +#if str($reference_cond.reference_source) == "cached" + --reference '$reference_cond.reference.fields.path' +#else: + --reference '$reference_cond.reference' +#end if +#if str($input_type) == "single": + --output_metrics '$output_metrics' + --output_vcf '$output_vcf' +#end if +]]></command> + <inputs> + <conditional name="input_type_cond"> + <param name="input_type" type="select" label="Choose the category of the files to be analyzed"> + <option value="single" selected="true">Single files</option> + <option value="collection">Collections of files</option> + </param> + <when value="single"> + <param name="bam_input" type="data" format="bam" label="BAM file"> + <validator type="unspecified_build"/> + </param> + <param name="vcf_input" type="data" format="vcf" label="VCF file"> + <validator type="unspecified_build"/> + </param> + </when> + <when value="collection"> + <param name="bam_input_collection" type="data_collection" format="bam" collection_type="list" label="Collection of BAM files"> + <validator type="unspecified_build"/> + </param> + <param name="vcf_input_collection" type="data_collection" format="vcf" collection_type="list" label="Collection of VCF files"> + <validator type="unspecified_build"/> + </param> + </when> + </conditional> + <conditional name="reference_cond"> + <param name="reference_source" type="select" label="Choose the source for the reference genome"> + <option value="cached" selected="true">locally cached</option> + <option value="history">from history</option> + </param> + <when value="cached"> + <param name="reference" type="select" label="Using reference genome"> + <options from_data_table="fasta_indexes"/> + <!-- No <filter> tag here! --> + <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected BAM file"/> + </param> + </when> + <when value="history"> + <param name="reference" type="data" format="fasta" label="Using reference genome"> + <validator type="no_options" message="The current history does not include a fasta dataset"/> + </param> + </when> + </conditional> + <param name="processes" type="integer" min="1" max="20" value="8" label="Number of processes for job splitting"/> + </inputs> + <outputs> + <data name="output_vcf" format="vcf" label="${tool.name} (filtered VCF) on ${on_string}"> + <filter>input_type_cond['input_type'] == 'single'</filter> + </data> + <collection name="output_vcf_collection" type="list" label="${tool.name} (filtered VCFs) on ${on_string}"> + <discover_datasets pattern="__name__" directory="output_vcf_dir" format="vcf" /> + <filter>input_type_cond['input_type'] == 'collection'</filter> + </collection> + <data name="output_metrics" format="tabular" label="${tool.name} (metrics) on ${on_string}"> + <filter>input_type_cond['input_type'] == 'single'</filter> + </data> + <collection name="output_metrics_collection" type="list" label="${tool.name} (metrics) on ${on_string}"> + <discover_datasets pattern="__name__" directory="output_metrics_dir" format="tabular" /> + <filter>input_type_cond['input_type'] == 'collection'</filter> + </collection> + </outputs> + <tests> + <test> + <param name="input_type" value="collection"/> + <param name="bam_input_collection"> + <collection type="list"> + <element name="bam_input.bam" value="bam_input.bam" dbkey="89"/> + <element name="bam_input2.bam" value="bam_input2.bam" dbkey="89"/> + </collection> + </param> + <param name="vcf_input_collection"> + <collection type="list"> + <element name="vcf_input.vcf" value="vcf_input.vcf" dbkey="89"/> + <element name="vcf_input2.vcf" value="vcf_input2.vcf" dbkey="89"/> + </collection> + </param> + <param name="reference_source" value="history"/> + <param name="reference" value="NC_002945v4.fasta" ftype="fasta"/> + <output_collection name="output_vcf_collection" type="list"> + <element name="vcf_input.vcf" file="output_vcf.vcf" ftype="vcf" compare="contains"/> + <element name="vcf_input2.vcf" file="output_vcf.vcf" ftype="vcf" compare="contains"/> + </output_collection> + <output_collection name="output_metrics_collection" type="list"> + <element name="vcf_input.tabular" file="output_metrics.tabular" ftype="tabular" compare="contains"/> + <element name="vcf_input2.tabular" file="output_metrics.tabular" ftype="tabular" compare="contains"/> + </output_collection> + </test> + <test> + <param name="bam_input" value="bam_input.bam" ftype="bam" dbkey="89"/> + <param name="vcf_input" value="vcf_input.vcf" ftype="vcf" dbkey="89"/> + <param name="reference_source" value="history"/> + <param name="reference" value="NC_002945v4.fasta" ftype="fasta"/> + <param name="output_vcf" value="output_vcf.vcf" ftype="vcf" compare="contains"/> + <output name="output_metrics" file="output_metrics.tabular" ftype="tabular" compare="contains"/> + </test> + </tests> + <help> +**What it does** + +Accepts a combination of single BAM and associated VCF files (or associated collections of each) to produce a VCF file for each +combination whose positions with no coverage are represented as "N". These outputs are restricted to SNPs and those regions +along the reference with no coverage. + +A metrics file is produced for each combination which provides the number of good SNPs, the average coverage and the genome +coverage percentage. + +**Required Options** + + * **Choose the category of the files to be analyzed** - select "Single files" or "Collections of files", then select the appropriate history items (single BAM and VCF files or collections of BAM and VCF files) based on the selected option. + * **Choose the source for the reference genome** - select "locally cached" if the reference associated with the BAM and VCF files is available within the Galaxy environment or "from history" to select the reference from the current history. + * **Number of processes for job splitting** - Select the number of processes for splitting the job to shorten execution time. + </help> + <citations> + <citation type="bibtex"> + @misc{None, + journal = {None}, + author = {1. Stuber T}, + title = {Manuscript in preparation}, + year = {None}, + url = {https://github.com/USDA-VS/vSNP},} + </citation> + </citations> +</tool> +