annotate gemini_annotate.xml @ 2:8da05bf2b1d1 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 11ee7ac206d41894c0b6a11f2439aaea490824f0
author iuc
date Thu, 09 Nov 2017 13:18:45 -0500
parents 685b3408c181
children 5bcaca8085bd
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
8da05bf2b1d1 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 11ee7ac206d41894c0b6a11f2439aaea490824f0
iuc
parents: 0
diff changeset
1 <tool id="gemini_@BINARY@" name="GEMINI @BINARY@" version="@VERSION@.1">
0
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
2 <description>adding your own custom annotations</description>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
3 <macros>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
4 <import>gemini_macros.xml</import>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
5 <token name="@BINARY@">annotate</token>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
6 </macros>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
7 <expand macro="requirements" />
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
8 <expand macro="stdio" />
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
9 <expand macro="version_command" />
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
10 <command>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
11 <![CDATA[
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
12
2
8da05bf2b1d1 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 11ee7ac206d41894c0b6a11f2439aaea490824f0
iuc
parents: 0
diff changeset
13 ## For GEMINI to work correctly, tabixed file must have form [name].[bed|vcf].gz
8da05bf2b1d1 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 11ee7ac206d41894c0b6a11f2439aaea490824f0
iuc
parents: 0
diff changeset
14 #set $tabixed_file = "tabixed.%s.gz" % $annotate_source.ext
8da05bf2b1d1 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 11ee7ac206d41894c0b6a11f2439aaea490824f0
iuc
parents: 0
diff changeset
15 bgzip -c "$annotate_source" > $tabixed_file &&
8da05bf2b1d1 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 11ee7ac206d41894c0b6a11f2439aaea490824f0
iuc
parents: 0
diff changeset
16 tabix -p "$annotate_source.ext" $tabixed_file &&
0
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
17
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
18 gemini @BINARY@
2
8da05bf2b1d1 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 11ee7ac206d41894c0b6a11f2439aaea490824f0
iuc
parents: 0
diff changeset
19 -f $tabixed_file
0
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
20 -c $column_name
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
21 -a $a.a_selector
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
22 #if $a.a_selector == 'extract':
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
23 -t $a.column_type
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
24 -e $a.column_extracts
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
25 -o $a.operation
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
26 #end if
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
27 $region_only
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
28 "${ infile }"
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
29 > "${ outfile }"
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
30 ]]>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
31
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
32 </command>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
33 <inputs>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
34 <expand macro="infile" />
2
8da05bf2b1d1 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 11ee7ac206d41894c0b6a11f2439aaea490824f0
iuc
parents: 0
diff changeset
35 <param name="annotate_source" type="data" format="vcf,bed" label="File containing the annotations in BED/VCF format" help="(-f)"/>
0
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
36
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
37 <param name="column_name" type="text" value=""
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
38 label="The name of the column to be added to the variant table"
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
39 help=" If the input file is a VCF, then this is the name of the info field to pull. (-c)">
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
40 <sanitizer invalid_char=" ">
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
41 <valid initial="string.letters,string.digits">
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
42 <add value="_" />
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
43 </valid>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
44 </sanitizer>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
45 </param>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
46 <conditional name="a">
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
47 <param name="a_selector" type="select" label="How should the annotation file be used?" help="(-a)">
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
48 <option value="boolean">Did a variant overlap a region or not? (boolean)</option>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
49 <option value="count">How many regions did a variant overlap? (count)</option>
2
8da05bf2b1d1 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 11ee7ac206d41894c0b6a11f2439aaea490824f0
iuc
parents: 0
diff changeset
50 <option value="extract" selected="True">Extract specific values from a BED/VCF file. (extract)</option>
0
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
51 </param>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
52 <when value="extract">
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
53
2
8da05bf2b1d1 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 11ee7ac206d41894c0b6a11f2439aaea490824f0
iuc
parents: 0
diff changeset
54 <param name="column_extracts" label="Column to extract information from for list annotations. For BED files, this is the column number. For VCF files, this is the name of the INFO field."
8da05bf2b1d1 planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/gemini commit 11ee7ac206d41894c0b6a11f2439aaea490824f0
iuc
parents: 0
diff changeset
55 type="text" force_select="true" help="(-e)"/>
0
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
56
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
57
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
58 <param name="column_type" type="select" label="What data type(s) should be used to represent the new values in the database?"
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
59 help="(-t)">
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
60 <option value="float">Decimal precision number (float)</option>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
61 <option value="integer">Integer number (integer)</option>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
62 <option value="text">Text columns such as “valid”, “yes” (text)</option>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
63 </param>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
64
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
65 <param name="operation" type="select" label="Operation to apply to the extract column values ..."
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
66 help="in the event that a variant overlaps multiple annotations in your annotation file. (-o)">
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
67 <option value="mean">Compute the average of the (numeric) values</option>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
68 <option value="sum">Compute the sum of the (numeric) values</option>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
69 <option value="median">Compute the median of the (numeric) values</option>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
70 <option value="min">Compute the minimum of the (numeric) values</option>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
71 <option value="max">Compute the maximum of the (numeric) values</option>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
72 <option value="mode">Compute the maximum of the (numeric) values</option>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
73 <option value="first">Use the value from the first record in the annotation file</option>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
74 <option value="last">Use the value from the last record in the annotation file</option>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
75 <option value="list">Create a comma-separated list of the observed (text) values</option>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
76 <option value="uniq_list">Create a comma-separated list of non-redundant observed (text) values</option>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
77 </param>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
78
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
79 </when>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
80 <when value="boolean"/>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
81 <when value="count"/>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
82 </conditional>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
83 <param name="region_only" argument="--region-only" type="boolean" checked="false"
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
84 truevalue="--region-only" falsevalue=""
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
85 label="If set, only region coordinates will be considered when annotating variants."
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
86 help="The default is to annotate using region coordinates as well as REF and ALT
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
87 variant values. This option is only valid if annotation is a VCF file"/>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
88 </inputs>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
89 <outputs>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
90 <data name="outfile" format="tabular" />
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
91 </outputs>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
92 <tests>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
93 <test>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
94 <param name="infile" value="gemini_annotate_input.db" ftype="gemini.sqlite" />
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
95 <param name="annotate_source" value="anno.bed" />
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
96 <param name="a_selector" value="count" />
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
97 <param name="column_name" value="anno5" />
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
98 <output name="outfile" file="gemini_annotate_result.tabular" />
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
99 </test>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
100 </tests>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
101 <help><![CDATA[
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
102 **What it does**
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
103
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
104 It is inevitable that researchers will want to enhance the GEMINI framework with their own, custom annotations. GEMINI provides a sub-command called annotate for exactly this purpose.
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
105
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
106 **Details**
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
107
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
108 It is inevitable that researchers will want to enhance the GEMINI framework with their own, custom annotations. GEMINI provides a sub-command called annotate for exactly this purpose. As long as you provide a tabix‘ed annotation file in BED or VCF format, the annotate tool will, for each variant in the variants table, screen for overlaps in your annotation file and update a one or more new column in the variants table that you may specify on the command line. This is best illustrated by a following **example**.
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
109
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
110 **Input files**
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
111
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
112 Let’s assume you have already created a GEMINI database of a **VCF file** using the *load module*.
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
113
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
114 Now, let’s imagine you have an annotated file in **BED format** (important.bed) that describes regions of the genome that are particularly relevant to your lab’s research. You would like to annotate in the GEMINI database which variants overlap these crucial regions. We want to store this knowledge in a new column in the variants table called important_variant that tracks whether a given variant overlapped (1) or did not overlap (0) intervals in your annotation file.
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
115
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
116 *To do this, you must first TABIX your BED file*
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
117
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
118 **-a boolean - Did a variant overlap a region or not?**
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
119
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
120 Now, you can use this *TABIX*’ed file to annotate which variants overlap your important regions. In the example below, the results will be stored in a new column called “important”. The **-t boolean** option says that you just want to track whether (1) or not (0) the variant overlapped one or more of your regions.
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
121
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
122 Since a new columns has been created in the database, we can now directly query the new column. In the example results below, the first and third variants overlapped a crucial region while the second did not::
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
123
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
124 chr22 100 101 1 1
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
125 chr22 200 201 2 0
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
126 chr22 300 500 3 1
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
127
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
128 **-a count - How many regions did a variant overlap?**
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
129
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
130 Instead of a simple yes or no, we can use the **-t count** option to count how many important regions a variant overlapped. It turns out that the 3rd variant actually overlapped two important regions::
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
131
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
132 chr22 100 101 1 1
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
133 chr22 200 201 2 0
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
134 chr22 300 500 3 2
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
135
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
136 **-a extract - Extract specific values from a BED file**
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
137
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
138 Lastly, we may also extract values from specific fields in a BED file (or from the INFO field in a VCF) and populate one or more new columns in the database based on overlaps with the annotation file and the values of the fields therein. To do this, we use the **-a extract** option.
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
139
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
140 This is best described with an example. To set this up, let’s imagine that we have a VCF file from a different experiment and we want to annotate the variants in our GEMINI database with the allele frequency and depth tags from the INFO fields for the same variants in this other VCF file.
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
141
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
142 Now that we have a proper *TABIX*’ed VCF file, we can use the **-a extract** option to populate new columns in the GEMINI database. In order to do so, we must specify:
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
143
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
144 1) its type (e.g., text, int, float,) (**-t**)
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
145 2) the field in the INFO column of the VCF file that we should use to extract data with which to populate the new column (**-e**)
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
146 3) what operation should be used to summarize the data in the event of multiple overlaps in the annotation file (**-o**)
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
147 4) (optionally) the name of the column we want to add (**-c**), if this is not specified, it will use the value from **-e**.
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
148
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
149 For example, let’s imagine we want to create a new column called “other_allele_freq” (**-c**) using the AF field in our VCF file to populate it.
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
150
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
151 This create a new column in my.db called other_allele_freq and this new column will be a FLOAT (**-t float**). In the event of multiple records in the VCF file overlapping a variant in the database, the average (**-o mean**) of the allele frequencies values from the VCF file will be used.
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
152
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
153 At this point, one can query the database based on the values of the new other_allele_freq column (using **GEMINI query**).
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
154
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
155 **-t TYPE - Specifying the column type(s) when using -a extract**
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
156
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
157 The annotate tool will create three different types of columns via the **-t** option:
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
158
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
159 1) Floating point columns for annotations with decimal precision as above (-t float)
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
160 2) Integer columns for integral annotations (-t integer)
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
161 3) Text columns for string columns such as “valid”, “yes”, etc. (-t text)
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
162
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
163 *The -t option is only valid when using the -a extract option.*
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
164
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
165 **-o OPERATION - Specifying the summary operations when using -a extract**
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
166
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
167 In the event of multiple overlaps between a variant and records in the annotation file, the annotate tool can summarize the values observed with multiple options:
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
168
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
169 - -o mean Compute the average of the values. They must be numeric.
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
170 - -o median Compute the median of the values. They must be numeric.
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
171 - -o min Compute the minimum of the values. They must be numeric.
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
172 - -o max Compute the maximum of the values. They must be numeric.
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
173 - -o mode Compute the maximum of the values. They must be numeric.
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
174 - -o first Use the value from the first record in the annotation file.
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
175 - -o last Use the value from the last record in the annotation file.
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
176 - -o list Create a comma-separated list of the observed values.
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
177 - -o uniq_list Create a comma-separated list of the distinct observed values.
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
178 - -o sum Compute the sum of the values. They must be numeric.
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
179
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
180 The -o option is only valid when using the -a extract option.
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
181
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
182 **Annotating with VCF**
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
183
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
184 Most of the examples to this point have pulled a column from a tabix indexed bed file. It is likewise possible to pull from the INFO field of a tabix index VCF. The syntax is identical but the **-e** operation will specify the names of fields in the INFO column to pull. By default, those names will be used, but that can still be specified with the **-c column**.
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
185
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
186 To put a DP column in the db, set:
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
187
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
188 -o list, -e DP, -t integer
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
189
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
190 ... and name it 'depth', set:
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
191
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
192 -o list, -e DP, -c depth, -t integer
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
193
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
194
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
195 Missing values are allowed since we expect that in some cases an annotation VCF will not have all INFO fields specified for all variants.
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
196
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
197 *We recommend decomposing and normalizing variants before annotating. See Step 1. split, left-align, and trim variants for a detailed explanation of how to do this. To do that see the GEMINI* preprocessing_ *website.*
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
198
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
199 **Extracting and populating multiple columns at once**
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
200
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
201 One can also extract and populate multiple columns at once by providing comma-separated lists (no spaces) of column names (**-c**), types (**-t**), numbers (**-e**), and summary operations (**-o**). For example, recall that in the VCF example above, we created a *TABIX*’ed BED file containg the allele frequency and depth values from the INFO field as the 4th and 5th columns in the BED, respectively.
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
202
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
203 Instead of running the annotate tool twice (once for each column), we can run the tool once and load both columns in the same run. For example with settings:
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
204
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
205 - -a extract
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
206 - -c other_allele_freq,other_depth
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
207 - -t float,integer
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
208 - -e 4,5
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
209 - -o mean,max
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
210
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
211 We can then use each of the new columns to filter variants with a *GEMINI query*:
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
212
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
213 .. _preprocessing: https://gemini.readthedocs.org/en/latest/content/preprocessing.html#preprocess
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
214
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
215 ]]></help>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
216 <expand macro="citations"/>
685b3408c181 planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/gemini commit 4bbfca6f0e9cae9a8f263aad4eab7304c96358c4
iuc
parents:
diff changeset
217 </tool>