annotate vsnp_add_zero_coverage.xml @ 6:bb6cc994707d draft

Uploaded
author greg
date Thu, 22 Jul 2021 17:53:31 +0000
parents 01312f8a6ca9
children 127a00fcd4a8
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
bb6cc994707d Uploaded
greg
parents: 2
diff changeset
1 <tool id="vsnp_add_zero_coverage" name="vSNP: add zero coverage" version="@WRAPPER_VERSION@.2+galaxy0" profile="@PROFILE@">
0
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
2 <description></description>
2
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
3 <macros>
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
4 <import>macros.xml</import>
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
5 </macros>
0
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
6 <requirements>
6
bb6cc994707d Uploaded
greg
parents: 2
diff changeset
7 <requirement type="package" version="3.0.7">openpyxl</requirement>
bb6cc994707d Uploaded
greg
parents: 2
diff changeset
8 <requirement type="package" version="1.79">biopython</requirement>
bb6cc994707d Uploaded
greg
parents: 2
diff changeset
9 <requirement type="package" version="1.3.0">pandas</requirement>
bb6cc994707d Uploaded
greg
parents: 2
diff changeset
10 <requirement type="package" version="0.16.0.1">pysam</requirement>
0
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
11 </requirements>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
12 <command detect_errors="exit_code"><![CDATA[
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
13 #import re
2
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
14
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
15 ## The identifer for both of the following files is likely the same
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
16 ## string, so we append a file extension to allow for both links.
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
17 #set bam_identifier = re.sub('[^\s\w\-]', '_', str($bam_input.element_identifier)) + '.bam'
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
18 ln -s '${bam_input}' '${bam_identifier}' &&
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
19 #set vcf_identifier = re.sub('[^\s\w\-]', '_', str($vcf_input.element_identifier)) + '.vcf'
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
20 ln -s '${vcf_input}' '${vcf_identifier}' &&
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
21
0
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
22 python '$__tool_directory__/vsnp_add_zero_coverage.py'
2
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
23 --bam_input '$bam_identifier'
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
24 --vcf_input '$vcf_identifier'
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
25 #if str($reference_cond.reference_source) == 'cached'
0
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
26 --reference '$reference_cond.reference.fields.path'
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
27 #else:
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
28 --reference '$reference_cond.reference'
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
29 #end if
2
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
30 --output_metrics '$output_metrics'
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
31 --output_vcf '$output_vcf'
0
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
32 ]]></command>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
33 <inputs>
2
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
34 <param name="bam_input" type="data" format="bam" label="BAM file"/>
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
35 <param name="vcf_input" type="data" format="vcf" label="VCF file"/>
0
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
36 <conditional name="reference_cond">
2
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
37 <expand macro="param_reference_source"/>
0
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
38 <when value="cached">
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
39 <param name="reference" type="select" label="Using reference genome">
2
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
40 <options from_data_table="fasta_indexes">
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
41 <filter type="data_meta" column="1" key="dbkey" ref="bam_input"/>
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
42 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected BAM file"/>
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
43 </options>
0
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
44 </param>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
45 </when>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
46 <when value="history">
2
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
47 <param name="reference" type="data" format="fasta,fasta.gz" label="Using reference genome">
0
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
48 <validator type="no_options" message="The current history does not include a fasta dataset"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
49 </param>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
50 </when>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
51 </conditional>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
52 </inputs>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
53 <outputs>
2
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
54 <data name="output_vcf" format="vcf" label="${tool.name} on ${on_string} (filtered VCF)"/>
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
55 <data name="output_metrics" format="tabular" label="${tool.name} on ${on_string} (metrics)"/>
0
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
56 </outputs>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
57 <tests>
2
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
58 <test expect_num_outputs="2">
0
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
59 <param name="bam_input" value="bam_input.bam" ftype="bam" dbkey="89"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
60 <param name="vcf_input" value="vcf_input.vcf" ftype="vcf" dbkey="89"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
61 <param name="reference_source" value="history"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
62 <param name="reference" value="NC_002945v4.fasta" ftype="fasta"/>
2
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
63 <output name="output_vcf" value="output_vcf.vcf" ftype="vcf" compare="contains"/>
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
64 <output name="output_metrics" file="output_metrics.tabular" ftype="tabular"/>
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
65 </test>
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
66 <test expect_num_outputs="2">
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
67 <param name="bam_input" value="bam_input.bam" ftype="bam" dbkey="89"/>
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
68 <param name="vcf_input" value="vcf_input.vcf" ftype="vcf" dbkey="89"/>
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
69 <param name="reference_source" value="cached"/>
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
70 <output name="output_vcf" value="output_vcf.vcf" ftype="vcf" compare="contains"/>
0
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
71 <output name="output_metrics" file="output_metrics.tabular" ftype="tabular" compare="contains"/>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
72 </test>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
73 </tests>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
74 <help>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
75 **What it does**
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
76
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
77 Accepts a combination of single BAM and associated VCF files (or associated collections of each) to produce a VCF file for each
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
78 combination whose positions with no coverage are represented as "N". These outputs are restricted to SNPs and those regions
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
79 along the reference with no coverage.
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
80
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
81 A metrics file is produced for each combination which provides the number of good SNPs, the average coverage and the genome
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
82 coverage percentage.
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
83
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
84 **Required Options**
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
85
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
86 * **Choose the source for the reference genome** - select "locally cached" if the reference associated with the BAM and VCF files is available within the Galaxy environment or "from history" to select the reference from the current history.
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
87 </help>
2
01312f8a6ca9 Uploaded
greg
parents: 0
diff changeset
88 <expand macro="citations" />
0
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
89 </tool>
3cb0bf7e1b2d Uploaded
greg
parents:
diff changeset
90