comparison snpSift_extractFields.xml @ 2:bf8c1526871b draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpsift/snpsift commit d12355cea76843e3ed6f09d96c3e9fe22afe4a4f
author iuc
date Mon, 05 Dec 2016 12:11:18 -0500
parents 98708b88af9f
children 20c7d583fec1
comparison
equal deleted inserted replaced
1:98708b88af9f 2:bf8c1526871b
1 <tool id="snpSift_extractFields" name="SnpSift Extract Fields" version="@WRAPPER_VERSION@.0"> 1 <tool id="snpSift_extractFields" name="SnpSift Extract Fields" version="@WRAPPER_VERSION@.1">
2 <options sanitize="False" /> 2 <options sanitize="False" />
3 <description>from a VCF file inot a tabular file</description> 3 <description>from a VCF file into a tabular file</description>
4 <macros> 4 <macros>
5 <import>snpSift_macros.xml</import> 5 <import>snpSift_macros.xml</import>
6 </macros> 6 </macros>
7 <expand macro="requirements" /> 7 <expand macro="requirements" />
8 <expand macro="stdio" /> 8 <expand macro="stdio" />
9 <expand macro="version_command" /> 9 <expand macro="version_command" />
10 <command><![CDATA[ 10 <command><![CDATA[
11 @CONDA_SNPSIFT_JAR_PATH@ &&
11 cat "$input" 12 cat "$input"
12 #if $one_effect_per_line: 13 #if $one_effect_per_line:
13 | "\$SNPEFF_JAR_PATH/scripts/vcfEffOnePerLine.pl" 14 | "\$SNPSIFT_JAR_PATH/scripts/vcfEffOnePerLine.pl"
14 #end if 15 #end if
15 | java -Xmx6G -jar "\$SNPEFF_JAR_PATH/SnpSift.jar" extractFields 16 | java -Xmx6G -jar "\$SNPSIFT_JAR_PATH/SnpSift.jar" extractFields
16 #if $separator: 17 #if $separator:
17 -s '$separator' 18 -s '$separator'
18 #end if 19 #end if
19 #if $empty_text: 20 #if $empty_text:
20 -e '$empty_text' 21 -e '$empty_text'
21 #end if 22 #end if
22 - 23 -
23 #echo ' '.join(['"%s"' % x for x in $extract.split()]) 24 #echo ' '.join(['"%s"' % x for x in $extract.split()])
24 > "$output" 25 > "$output"
25 ]]> 26 ]]>
26 </command> 27 </command>
27 <inputs> 28 <inputs>
28 <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/> 29 <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/>
29 <param name="extract" type="text" label="Extract" help="Need help? See below a few examples." /> 30 <param name="extract" type="text" label="Extract" help="Need help? See below a few examples." />
30 <param name="one_effect_per_line" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="One effect per line" help="When variants have more than one effect, lists one effect per line, while all other parameters in the line are repeated across mutiple lines" /> 31 <param name="one_effect_per_line" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="One effect per line" help="When variants have more than one effect, lists one effect per line, while all other parameters in the line are repeated across mutiple lines" />
31 <param name="separator" type="text" value="" optional="true" label="multiple field separator" help="Separate multiple fields in one column with this character, e.g. a comma, rather than a column for each of the multiple values"> 32 <param name="separator" type="text" value="" label="multiple field separator" help="Separate multiple fields in one column with this character, e.g. a comma, rather than a column for each of the multiple values" />
32 </param> 33 <param name="empty_text" type="text" value="" label="empty field text" help="Represent empty fields with this value, rather than leaving them blank" />
33 <param name="empty_text" type="text" value="" optional="true" label="empty field text" help="Represent empty fields with this value, rather than leaving them blank" >
34 </param>
35 </inputs> 34 </inputs>
36 <outputs> 35 <outputs>
37 <data format="tabular" name="output" /> 36 <data format="tabular" name="output" />
38 </outputs> 37 </outputs>
39 <tests> 38 <tests>
76 CHROM 75 CHROM
77 POS 76 POS
78 ID 77 ID
79 REF 78 REF
80 ALT 79 ALT
81 FILTER 80 FILTER
82 INFO fields: 81 INFO fields:
83 AF 82 AF
84 AC 83 AC
85 DP 84 DP
86 MQ 85 MQ
87 etc. (any info field available) 86 etc. (any info field available)
88 SnpEff 'ANN' fields: 87 SnpEff 'ANN' fields:
89 "ANN[*].ALLELE" (alias GENOTYPE) 88 "ANN[*].ALLELE" (alias GENOTYPE)
90 "ANN[*].EFFECT" (alias ANNOTATION): Effect in Sequence ontology terms (e.g. 'missense_variant', 'synonymous_variant', 'stop_gained', etc.) 89 "ANN[*].EFFECT" (alias ANNOTATION): Effect in Sequence ontology terms (e.g. 'missense_variant', 'synonymous_variant', 'stop_gained', etc.)
91 "ANN[*].IMPACT" { HIGH, MODERATE, LOW, MODIFIER } 90 "ANN[*].IMPACT" { HIGH, MODERATE, LOW, MODIFIER }
92 "ANN[*].GENE" Gene name (e.g. 'PSD3') 91 "ANN[*].GENE" Gene name (e.g. 'PSD3')
102 "ANN[*].CDS_POS" (alias POS_CDS) 101 "ANN[*].CDS_POS" (alias POS_CDS)
103 "ANN[*].CDS_LEN" (alias LEN_CDS) 102 "ANN[*].CDS_LEN" (alias LEN_CDS)
104 "ANN[*].AA_POS" (alias POS_AA) 103 "ANN[*].AA_POS" (alias POS_AA)
105 "ANN[*].AA_LEN" (alias LEN_AA) 104 "ANN[*].AA_LEN" (alias LEN_AA)
106 "ANN[*].DISTANCE" 105 "ANN[*].DISTANCE"
107 "ANN[*].ERRORS" (alias WARNING, INFOS) 106 "ANN[*].ERRORS" (alias WARNING, INFOS)
108 SnpEff 'EFF' fields (this is for older SnpEff/SnpSift versions, new version use 'ANN' field): 107 SnpEff 'EFF' fields (this is for older SnpEff/SnpSift versions, new version use 'ANN' field):
109 "EFF[*].EFFECT" 108 "EFF[*].EFFECT"
110 "EFF[*].IMPACT" 109 "EFF[*].IMPACT"
111 "EFF[*].FUNCLASS" 110 "EFF[*].FUNCLASS"
112 "EFF[*].CODON" 111 "EFF[*].CODON"
114 "EFF[*].AA_LEN" 113 "EFF[*].AA_LEN"
115 "EFF[*].GENE" 114 "EFF[*].GENE"
116 "EFF[*].BIOTYPE" 115 "EFF[*].BIOTYPE"
117 "EFF[*].CODING" 116 "EFF[*].CODING"
118 "EFF[*].TRID" 117 "EFF[*].TRID"
119 "EFF[*].RANK" 118 "EFF[*].RANK"
120 SnpEff 'LOF' fields: 119 SnpEff 'LOF' fields:
121 "LOF[*].GENE" 120 "LOF[*].GENE"
122 "LOF[*].GENEID" 121 "LOF[*].GENEID"
123 "LOF[*].NUMTR" 122 "LOF[*].NUMTR"
124 "LOF[*].PERC" 123 "LOF[*].PERC"
125 SnpEff' NMD' fields: 124 SnpEff' NMD' fields:
126 "NMD[*].GENE" 125 "NMD[*].GENE"
127 "NMD[*].GENEID" 126 "NMD[*].GENEID"
128 "NMD[*].NUMTR" 127 "NMD[*].NUMTR"
129 "NMD[*].PERC" 128 "NMD[*].PERC"
130 129
131 130
132 Some examples: 131 Some examples:
133 132
134 - *Extracting chromosome, position, ID and allele frequency from a VCF file:* 133 - *Extracting chromosome, position, ID and allele frequency from a VCF file:*
135 134
136 **CHROM POS ID AF** 135 **CHROM POS ID AF**
137 136
138 The result will look something like: 137 The result will look something like:
139 138
140 :: 139 ::
141 140
142 #CHROM POS ID AF 141 #CHROM POS ID AF
143 1 69134 0.086 142 1 69134 0.086
153 - CHROM POS ID: regular fields (as in the previous example) 152 - CHROM POS ID: regular fields (as in the previous example)
154 - THETA : This one is from INFO 153 - THETA : This one is from INFO
155 - GEN[0].GL[1] : Second likelihood from first genotype 154 - GEN[0].GL[1] : Second likelihood from first genotype
156 - GEN[1].GL : The whole GL fiels (all entries without separating them) 155 - GEN[1].GL : The whole GL fiels (all entries without separating them)
157 - GEN[3].GL[*] : All likelihoods form genotype 3 (this time they will be tab separated, as opposed to the previous one). 156 - GEN[3].GL[*] : All likelihoods form genotype 3 (this time they will be tab separated, as opposed to the previous one).
158 - GEN[*].GT : Genotype subfields (GT) from ALL samples (tab separated). 157 - GEN[*].GT : Genotype subfields (GT) from ALL samples (tab separated).
159 158
160 The result will look something like: 159 The result will look something like:
161 160
162 :: 161 ::
163 162
164 #CHROM POS ID THETA GEN[0].GL[1] GEN[1].GL GEN[3].GL[*] GEN[*].GT 163 #CHROM POS ID THETA GEN[0].GL[1] GEN[1].GL GEN[3].GL[*] GEN[*].GT
165 1 10583 rs58108140 0.0046 -0.47 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|0 0|0 0|1 0|0 0|1 0|0 0|0 0|1 164 1 10583 rs58108140 0.0046 -0.47 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|0 0|0 0|1 0|0 0|1 0|0 0|0 0|1
166 1 10611 rs189107123 0.0077 -0.48 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 0|0 0|0 0|0 0|0 165 1 10611 rs189107123 0.0077 -0.48 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 0|0 0|0 0|0 0|0
167 1 13302 rs180734498 0.0048 -0.58 -2.45,-0.00,-5.00 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 1|0 0|0 0|1 0|0 166 1 13302 rs180734498 0.0048 -0.58 -2.45,-0.00,-5.00 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 1|0 0|0 0|1 0|0
168 167
169 - *Extracting fields with multiple values:* 168 - *Extracting fields with multiple values:*
170 (notice that there are multiple effect columns per line because there are mutiple effects per variant) 169 (notice that there are multiple effect columns per line because there are mutiple effects per variant)
171 170
172 **CHROM POS REF ALT ANN[*].EFFECT** 171 **CHROM POS REF ALT ANN[*].EFFECT**
173 172
174 The result will look something like: 173 The result will look something like:
175 174
176 :: 175 ::
177 176
178 #CHROM POS REF ALT ANN[*].EFFECT 177 #CHROM POS REF ALT ANN[*].EFFECT
179 22 17071756 T C 3_prime_UTR_variant downstream_gene_variant 178 22 17071756 T C 3_prime_UTR_variant downstream_gene_variant
180 22 17072035 C T missense_variant downstream_gene_variant 179 22 17072035 C T missense_variant downstream_gene_variant
181 22 17072258 C A missense_variant downstream_gene_variant 180 22 17072258 C A missense_variant downstream_gene_variant
182 181
183 - *Extracting fields with multiple values using a comma as a multipe field separator:* 182 - *Extracting fields with multiple values using a comma as a multipe field separator:*
184 183
185 **CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P** 184 **CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P**
186 185
187 The result will look something like: 186 The result will look something like:
188 187
189 :: 188 ::
190 189
191 #CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P 190 #CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P
192 22 17071756 T C 3_prime_UTR_variant,downstream_gene_variant .,. 191 22 17071756 T C 3_prime_UTR_variant,downstream_gene_variant .,.
193 22 17072035 C T missense_variant,downstream_gene_variant p.Gly469Glu,. 192 22 17072035 C T missense_variant,downstream_gene_variant p.Gly469Glu,.
194 22 17072258 C A missense_variant,downstream_gene_variant p.Gly395Cys,. 193 22 17072258 C A missense_variant,downstream_gene_variant p.Gly395Cys,.
196 195
197 - *Extracting fields with multiple values, one effect per line:* 196 - *Extracting fields with multiple values, one effect per line:*
198 197
199 **CHROM POS REF ALT ANN[*].EFFECT** 198 **CHROM POS REF ALT ANN[*].EFFECT**
200 199
201 The result will look something like: 200 The result will look something like:
202 201
203 :: 202 ::
204 203
205 #CHROM POS REF ALT ANN[*].EFFECT 204 #CHROM POS REF ALT ANN[*].EFFECT
206 22 17071756 T C 3_prime_UTR_variant 205 22 17071756 T C 3_prime_UTR_variant
207 22 17071756 T C downstream_gene_variant 206 22 17071756 T C downstream_gene_variant
208 22 17072035 C T missense_variant 207 22 17072035 C T missense_variant