Mercurial > repos > iuc > snpsift
comparison snpSift_extractFields.xml @ 3:20c7d583fec1 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpsift/snpsift commit fbc18d9128669e461e76ed13307ee88dd774afa5
author | iuc |
---|---|
date | Mon, 12 Jun 2017 10:25:32 -0400 |
parents | bf8c1526871b |
children | 09d6806c609e |
comparison
equal
deleted
inserted
replaced
2:bf8c1526871b | 3:20c7d583fec1 |
---|---|
1 <tool id="snpSift_extractFields" name="SnpSift Extract Fields" version="@WRAPPER_VERSION@.1"> | 1 <tool id="snpSift_extractFields" name="SnpSift Extract Fields" version="@WRAPPER_VERSION@.0"> |
2 <options sanitize="False" /> | 2 <options sanitize="False" /> |
3 <description>from a VCF file into a tabular file</description> | 3 <description>from a VCF file into a tabular file</description> |
4 <macros> | 4 <macros> |
5 <import>snpSift_macros.xml</import> | 5 <import>snpSift_macros.xml</import> |
6 </macros> | 6 </macros> |
7 <expand macro="requirements" /> | 7 <expand macro="requirements" /> |
8 <expand macro="stdio" /> | 8 <expand macro="stdio" /> |
9 <expand macro="version_command" /> | 9 <expand macro="version_command" /> |
10 <command><![CDATA[ | 10 <command><![CDATA[ |
11 @CONDA_SNPSIFT_JAR_PATH@ && | 11 @CONDA_SNPSIFT_JAR_PATH@ && |
12 cat "$input" | 12 cat '$input' |
13 #if $one_effect_per_line: | 13 #if $one_effect_per_line: |
14 | "\$SNPSIFT_JAR_PATH/scripts/vcfEffOnePerLine.pl" | 14 | "\$SNPSIFT_JAR_PATH/scripts/vcfEffOnePerLine.pl" |
15 #end if | 15 #end if |
16 | java -Xmx6G -jar "\$SNPSIFT_JAR_PATH/SnpSift.jar" extractFields | 16 | SnpSift -Xmx6G extractFields |
17 #if $separator: | 17 #if $separator: |
18 -s '$separator' | 18 -s '$separator' |
19 #end if | 19 #end if |
20 #if $empty_text: | 20 #if $empty_text: |
21 -e '$empty_text' | 21 -e '$empty_text' |
22 #end if | 22 #end if |
23 - | 23 - |
24 #echo ' '.join(['"%s"' % x for x in $extract.split()]) | 24 #echo ' '.join(['"%s"' % x for x in $extract.split()]) |
25 > "$output" | 25 > '$output' |
26 ]]> | 26 ]]></command> |
27 </command> | |
28 <inputs> | 27 <inputs> |
29 <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/> | 28 <param name="input" type="data" format="vcf" label="Variant input file in VCF format"/> |
30 <param name="extract" type="text" label="Extract" help="Need help? See below a few examples." /> | 29 <param name="extract" type="text" label="Extract" help="Need help? See below a few examples." /> |
31 <param name="one_effect_per_line" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="One effect per line" help="When variants have more than one effect, lists one effect per line, while all other parameters in the line are repeated across mutiple lines" /> | 30 <param name="one_effect_per_line" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="One effect per line" help="When variants have more than one effect, lists one effect per line, while all other parameters in the line are repeated across mutiple lines" /> |
32 <param name="separator" type="text" value="" label="multiple field separator" help="Separate multiple fields in one column with this character, e.g. a comma, rather than a column for each of the multiple values" /> | 31 <param name="separator" type="text" value="" label="multiple field separator" help="Separate multiple fields in one column with this character, e.g. a comma, rather than a column for each of the multiple values" /> |
33 <param name="empty_text" type="text" value="" label="empty field text" help="Represent empty fields with this value, rather than leaving them blank" /> | 32 <param name="empty_text" type="text" value="" label="empty field text" help="Represent empty fields with this value, rather than leaving them blank" /> |
34 </inputs> | 33 </inputs> |
35 <outputs> | 34 <outputs> |
36 <data format="tabular" name="output" /> | 35 <data name="output" format="tabular" /> |
37 </outputs> | 36 </outputs> |
38 <tests> | 37 <tests> |
39 <test> | 38 <test> |
40 <param name="input" ftype="vcf" value="test_rmInfo.vcf"/> | 39 <param name="input" ftype="vcf" value="test_rmInfo.vcf"/> |
41 <param name="extract" value="CHROM POS REF ALT EFF[*].EFFECT"/> | 40 <param name="extract" value="CHROM POS REF ALT EFF[*].EFFECT"/> |
55 <assert_contents> | 54 <assert_contents> |
56 <has_text text="DOWNSTREAM,INTRAGENIC,INTRON,UTR_3_PRIME" /> | 55 <has_text text="DOWNSTREAM,INTRAGENIC,INTRON,UTR_3_PRIME" /> |
57 </assert_contents> | 56 </assert_contents> |
58 </output> | 57 </output> |
59 </test> | 58 </test> |
60 | |
61 </tests> | 59 </tests> |
62 <help><![CDATA[ | 60 <help><![CDATA[ |
63 | |
64 **SnpSift Extract Fields** | 61 **SnpSift Extract Fields** |
65 | 62 |
66 Extract fields from a VCF file to a TXT, tab separated format, that you can easily load in R, XLS, etc. | 63 Extract fields from a VCF file to a TXT, tab separated format, that you can easily load in R, XLS, etc. |
67 | 64 |
68 http://snpeff.sourceforge.net/SnpSift.html#Extract | 65 http://snpeff.sourceforge.net/SnpSift.html#Extract |
69 | 66 |
70 You can also use sub-fields and genotype fields / sub-fields such as: | 67 You can also use sub-fields and genotype fields / sub-fields such as:: |
71 | |
72 :: | |
73 | 68 |
74 Standard VCF fields: | 69 Standard VCF fields: |
75 CHROM | 70 CHROM |
76 POS | 71 POS |
77 ID | 72 ID |
125 "NMD[*].GENE" | 120 "NMD[*].GENE" |
126 "NMD[*].GENEID" | 121 "NMD[*].GENEID" |
127 "NMD[*].NUMTR" | 122 "NMD[*].NUMTR" |
128 "NMD[*].PERC" | 123 "NMD[*].PERC" |
129 | 124 |
130 | |
131 Some examples: | 125 Some examples: |
132 | 126 |
133 - *Extracting chromosome, position, ID and allele frequency from a VCF file:* | 127 - *Extracting chromosome, position, ID and allele frequency from a VCF file*: |
134 | 128 |
135 **CHROM POS ID AF** | 129 **CHROM POS ID AF** |
136 | 130 |
137 The result will look something like: | 131 The result will look something like:: |
138 | 132 |
139 :: | 133 #CHROM POS ID AF |
134 1 69134 0.086 | |
135 1 69496 rs150690004 0.001 | |
140 | 136 |
141 #CHROM POS ID AF | 137 - *Extracting genotype fields*: |
142 1 69134 0.086 | |
143 1 69496 rs150690004 0.001 | |
144 | 138 |
139 **CHROM POS ID THETA GEN[0].GL[1] GEN[1].GL GEN[3].GL[*] GEN[*].GT** | |
145 | 140 |
146 - *Extracting genotype fields:* | 141 This means to extract: |
147 | 142 |
148 **CHROM POS ID THETA GEN[0].GL[1] GEN[1].GL GEN[3].GL[*] GEN[*].GT** | 143 - CHROM POS ID: regular fields (as in the previous example) |
144 - THETA : This one is from INFO | |
145 - GEN[0].GL[1] : Second likelihood from first genotype | |
146 - GEN[1].GL : The whole GL fiels (all entries without separating them) | |
147 - GEN[3].GL[*] : All likelihoods form genotype 3 (this time they will be tab separated, as opposed to the previous one). | |
148 - GEN[*].GT : Genotype subfields (GT) from ALL samples (tab separated). | |
149 | 149 |
150 This means to extract: | 150 The result will look something like:: |
151 | 151 |
152 - CHROM POS ID: regular fields (as in the previous example) | 152 #CHROM POS ID THETA GEN[0].GL[1] GEN[1].GL GEN[3].GL[*] GEN[*].GT |
153 - THETA : This one is from INFO | 153 1 10583 rs58108140 0.0046 -0.47 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|0 0|0 0|1 0|0 0|1 0|0 0|0 0|1 |
154 - GEN[0].GL[1] : Second likelihood from first genotype | 154 1 10611 rs189107123 0.0077 -0.48 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 0|0 0|0 0|0 0|0 |
155 - GEN[1].GL : The whole GL fiels (all entries without separating them) | 155 1 13302 rs180734498 0.0048 -0.58 -2.45,-0.00,-5.00 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 1|0 0|0 0|1 0|0 |
156 - GEN[3].GL[*] : All likelihoods form genotype 3 (this time they will be tab separated, as opposed to the previous one). | |
157 - GEN[*].GT : Genotype subfields (GT) from ALL samples (tab separated). | |
158 | 156 |
159 The result will look something like: | 157 - *Extracting fields with multiple values*: |
158 (notice that there are multiple effect columns per line because there are mutiple effects per variant) | |
160 | 159 |
161 :: | 160 **CHROM POS REF ALT ANN[*].EFFECT** |
162 | 161 |
163 #CHROM POS ID THETA GEN[0].GL[1] GEN[1].GL GEN[3].GL[*] GEN[*].GT | 162 The result will look something like:: |
164 1 10583 rs58108140 0.0046 -0.47 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|0 0|0 0|1 0|0 0|1 0|0 0|0 0|1 | |
165 1 10611 rs189107123 0.0077 -0.48 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 0|0 0|0 0|0 0|0 | |
166 1 13302 rs180734498 0.0048 -0.58 -2.45,-0.00,-5.00 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 1|0 0|0 0|1 0|0 | |
167 | 163 |
168 - *Extracting fields with multiple values:* | 164 #CHROM POS REF ALT ANN[*].EFFECT |
169 (notice that there are multiple effect columns per line because there are mutiple effects per variant) | 165 22 17071756 T C 3_prime_UTR_variant downstream_gene_variant |
166 22 17072035 C T missense_variant downstream_gene_variant | |
167 22 17072258 C A missense_variant downstream_gene_variant | |
170 | 168 |
171 **CHROM POS REF ALT ANN[*].EFFECT** | 169 - *Extracting fields with multiple values using a comma as a multipe field separator:* |
172 | 170 |
173 The result will look something like: | 171 **CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P** |
174 | 172 |
175 :: | 173 The result will look something like:: |
176 | 174 |
177 #CHROM POS REF ALT ANN[*].EFFECT | 175 #CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P |
178 22 17071756 T C 3_prime_UTR_variant downstream_gene_variant | 176 22 17071756 T C 3_prime_UTR_variant,downstream_gene_variant .,. |
179 22 17072035 C T missense_variant downstream_gene_variant | 177 22 17072035 C T missense_variant,downstream_gene_variant p.Gly469Glu,. |
180 22 17072258 C A missense_variant downstream_gene_variant | 178 22 17072258 C A missense_variant,downstream_gene_variant p.Gly395Cys,. |
181 | 179 |
182 - *Extracting fields with multiple values using a comma as a multipe field separator:* | 180 - *Extracting fields with multiple values, one effect per line:* |
183 | 181 |
184 **CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P** | 182 **CHROM POS REF ALT ANN[*].EFFECT** |
185 | 183 |
186 The result will look something like: | 184 The result will look something like:: |
187 | 185 |
188 :: | 186 #CHROM POS REF ALT ANN[*].EFFECT |
189 | 187 22 17071756 T C 3_prime_UTR_variant |
190 #CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P | 188 22 17071756 T C downstream_gene_variant |
191 22 17071756 T C 3_prime_UTR_variant,downstream_gene_variant .,. | 189 22 17072035 C T missense_variant |
192 22 17072035 C T missense_variant,downstream_gene_variant p.Gly469Glu,. | 190 22 17072035 C T downstream_gene_variant |
193 22 17072258 C A missense_variant,downstream_gene_variant p.Gly395Cys,. | 191 22 17072258 C A missense_variant |
194 | 192 22 17072258 C A downstream_gene_variant |
195 | |
196 - *Extracting fields with multiple values, one effect per line:* | |
197 | |
198 **CHROM POS REF ALT ANN[*].EFFECT** | |
199 | |
200 The result will look something like: | |
201 | |
202 :: | |
203 | |
204 #CHROM POS REF ALT ANN[*].EFFECT | |
205 22 17071756 T C 3_prime_UTR_variant | |
206 22 17071756 T C downstream_gene_variant | |
207 22 17072035 C T missense_variant | |
208 22 17072035 C T downstream_gene_variant | |
209 22 17072258 C A missense_variant | |
210 22 17072258 C A downstream_gene_variant | |
211 | |
212 | 193 |
213 @EXTERNAL_DOCUMENTATION@ | 194 @EXTERNAL_DOCUMENTATION@ |
214 http://snpeff.sourceforge.net/SnpSift.html#Extract | 195 - http://snpeff.sourceforge.net/SnpSift.html#Extract |
215 | 196 ]]></help> |
216 ]]> | |
217 </help> | |
218 <expand macro="citations" /> | 197 <expand macro="citations" /> |
219 </tool> | 198 </tool> |