Mercurial > repos > iuc > snpsift
comparison snpSift_extractFields.xml @ 1:98708b88af9f draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpsift/snpsift commit 21b46ae2c90ba7e569b2b3a9eaf938f8dedb2c31
author | iuc |
---|---|
date | Tue, 07 Jun 2016 10:04:09 -0400 |
parents | |
children | bf8c1526871b |
comparison
equal
deleted
inserted
replaced
0:9e8280e19338 | 1:98708b88af9f |
---|---|
1 <tool id="snpSift_extractFields" name="SnpSift Extract Fields" version="@WRAPPER_VERSION@.0"> | |
2 <options sanitize="False" /> | |
3 <description>from a VCF file inot a tabular file</description> | |
4 <macros> | |
5 <import>snpSift_macros.xml</import> | |
6 </macros> | |
7 <expand macro="requirements" /> | |
8 <expand macro="stdio" /> | |
9 <expand macro="version_command" /> | |
10 <command><![CDATA[ | |
11 cat "$input" | |
12 #if $one_effect_per_line: | |
13 | "\$SNPEFF_JAR_PATH/scripts/vcfEffOnePerLine.pl" | |
14 #end if | |
15 | java -Xmx6G -jar "\$SNPEFF_JAR_PATH/SnpSift.jar" extractFields | |
16 #if $separator: | |
17 -s '$separator' | |
18 #end if | |
19 #if $empty_text: | |
20 -e '$empty_text' | |
21 #end if | |
22 - | |
23 #echo ' '.join(['"%s"' % x for x in $extract.split()]) | |
24 > "$output" | |
25 ]]> | |
26 </command> | |
27 <inputs> | |
28 <param format="vcf" name="input" type="data" label="Variant input file in VCF format"/> | |
29 <param name="extract" type="text" label="Extract" help="Need help? See below a few examples." /> | |
30 <param name="one_effect_per_line" type="boolean" truevalue="yes" falsevalue="no" checked="false" label="One effect per line" help="When variants have more than one effect, lists one effect per line, while all other parameters in the line are repeated across mutiple lines" /> | |
31 <param name="separator" type="text" value="" optional="true" label="multiple field separator" help="Separate multiple fields in one column with this character, e.g. a comma, rather than a column for each of the multiple values"> | |
32 </param> | |
33 <param name="empty_text" type="text" value="" optional="true" label="empty field text" help="Represent empty fields with this value, rather than leaving them blank" > | |
34 </param> | |
35 </inputs> | |
36 <outputs> | |
37 <data format="tabular" name="output" /> | |
38 </outputs> | |
39 <tests> | |
40 <test> | |
41 <param name="input" ftype="vcf" value="test_rmInfo.vcf"/> | |
42 <param name="extract" value="CHROM POS REF ALT EFF[*].EFFECT"/> | |
43 <output name="output"> | |
44 <assert_contents> | |
45 <has_text text="INTRAGENIC" /> | |
46 <not_has_text text="DOWNSTREAM,INTRAGENIC,INTRON,UTR_3_PRIME" /> | |
47 </assert_contents> | |
48 </output> | |
49 </test> | |
50 | |
51 <test> | |
52 <param name="input" ftype="vcf" value="test_rmInfo.vcf"/> | |
53 <param name="extract" value="CHROM POS REF ALT EFF[*].EFFECT"/> | |
54 <param name="separator" value=","/> | |
55 <output name="output"> | |
56 <assert_contents> | |
57 <has_text text="DOWNSTREAM,INTRAGENIC,INTRON,UTR_3_PRIME" /> | |
58 </assert_contents> | |
59 </output> | |
60 </test> | |
61 | |
62 </tests> | |
63 <help><![CDATA[ | |
64 | |
65 **SnpSift Extract Fields** | |
66 | |
67 Extract fields from a VCF file to a TXT, tab separated format, that you can easily load in R, XLS, etc. | |
68 | |
69 http://snpeff.sourceforge.net/SnpSift.html#Extract | |
70 | |
71 You can also use sub-fields and genotype fields / sub-fields such as: | |
72 | |
73 :: | |
74 | |
75 Standard VCF fields: | |
76 CHROM | |
77 POS | |
78 ID | |
79 REF | |
80 ALT | |
81 FILTER | |
82 INFO fields: | |
83 AF | |
84 AC | |
85 DP | |
86 MQ | |
87 etc. (any info field available) | |
88 SnpEff 'ANN' fields: | |
89 "ANN[*].ALLELE" (alias GENOTYPE) | |
90 "ANN[*].EFFECT" (alias ANNOTATION): Effect in Sequence ontology terms (e.g. 'missense_variant', 'synonymous_variant', 'stop_gained', etc.) | |
91 "ANN[*].IMPACT" { HIGH, MODERATE, LOW, MODIFIER } | |
92 "ANN[*].GENE" Gene name (e.g. 'PSD3') | |
93 "ANN[*].GENEID" Gene ID | |
94 "ANN[*].FEATURE" | |
95 "ANN[*].FEATUREID" (alias TRID: Transcript ID) | |
96 "ANN[*].BIOTYPE" Biotype, as described by the annotations (e.g. 'protein_coding') | |
97 "ANN[*].RANK" Exon or Intron rank (i.e. exon number in a transcript) | |
98 "ANN[*].HGVS_C" (alias HGVS_DNA, CODON): Variant in HGVS (DNA) notation | |
99 "ANN[*].HGVS_P" (alias HGVS, HGVS_PROT, AA): Variant in HGVS (protein) notation | |
100 "ANN[*].CDNA_POS" (alias POS_CDNA) | |
101 "ANN[*].CDNA_LEN" (alias LEN_CDNA) | |
102 "ANN[*].CDS_POS" (alias POS_CDS) | |
103 "ANN[*].CDS_LEN" (alias LEN_CDS) | |
104 "ANN[*].AA_POS" (alias POS_AA) | |
105 "ANN[*].AA_LEN" (alias LEN_AA) | |
106 "ANN[*].DISTANCE" | |
107 "ANN[*].ERRORS" (alias WARNING, INFOS) | |
108 SnpEff 'EFF' fields (this is for older SnpEff/SnpSift versions, new version use 'ANN' field): | |
109 "EFF[*].EFFECT" | |
110 "EFF[*].IMPACT" | |
111 "EFF[*].FUNCLASS" | |
112 "EFF[*].CODON" | |
113 "EFF[*].AA" | |
114 "EFF[*].AA_LEN" | |
115 "EFF[*].GENE" | |
116 "EFF[*].BIOTYPE" | |
117 "EFF[*].CODING" | |
118 "EFF[*].TRID" | |
119 "EFF[*].RANK" | |
120 SnpEff 'LOF' fields: | |
121 "LOF[*].GENE" | |
122 "LOF[*].GENEID" | |
123 "LOF[*].NUMTR" | |
124 "LOF[*].PERC" | |
125 SnpEff' NMD' fields: | |
126 "NMD[*].GENE" | |
127 "NMD[*].GENEID" | |
128 "NMD[*].NUMTR" | |
129 "NMD[*].PERC" | |
130 | |
131 | |
132 Some examples: | |
133 | |
134 - *Extracting chromosome, position, ID and allele frequency from a VCF file:* | |
135 | |
136 **CHROM POS ID AF** | |
137 | |
138 The result will look something like: | |
139 | |
140 :: | |
141 | |
142 #CHROM POS ID AF | |
143 1 69134 0.086 | |
144 1 69496 rs150690004 0.001 | |
145 | |
146 | |
147 - *Extracting genotype fields:* | |
148 | |
149 **CHROM POS ID THETA GEN[0].GL[1] GEN[1].GL GEN[3].GL[*] GEN[*].GT** | |
150 | |
151 This means to extract: | |
152 | |
153 - CHROM POS ID: regular fields (as in the previous example) | |
154 - THETA : This one is from INFO | |
155 - GEN[0].GL[1] : Second likelihood from first genotype | |
156 - GEN[1].GL : The whole GL fiels (all entries without separating them) | |
157 - GEN[3].GL[*] : All likelihoods form genotype 3 (this time they will be tab separated, as opposed to the previous one). | |
158 - GEN[*].GT : Genotype subfields (GT) from ALL samples (tab separated). | |
159 | |
160 The result will look something like: | |
161 | |
162 :: | |
163 | |
164 #CHROM POS ID THETA GEN[0].GL[1] GEN[1].GL GEN[3].GL[*] GEN[*].GT | |
165 1 10583 rs58108140 0.0046 -0.47 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|0 0|0 0|1 0|0 0|1 0|0 0|0 0|1 | |
166 1 10611 rs189107123 0.0077 -0.48 -0.24,-0.44,-1.16 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 0|0 0|0 0|0 0|0 | |
167 1 13302 rs180734498 0.0048 -0.58 -2.45,-0.00,-5.00 -0.48 -0.48 -0.48 0|0 0|1 0|0 0|0 0|0 1|0 0|0 0|1 0|0 | |
168 | |
169 - *Extracting fields with multiple values:* | |
170 (notice that there are multiple effect columns per line because there are mutiple effects per variant) | |
171 | |
172 **CHROM POS REF ALT ANN[*].EFFECT** | |
173 | |
174 The result will look something like: | |
175 | |
176 :: | |
177 | |
178 #CHROM POS REF ALT ANN[*].EFFECT | |
179 22 17071756 T C 3_prime_UTR_variant downstream_gene_variant | |
180 22 17072035 C T missense_variant downstream_gene_variant | |
181 22 17072258 C A missense_variant downstream_gene_variant | |
182 | |
183 - *Extracting fields with multiple values using a comma as a multipe field separator:* | |
184 | |
185 **CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P** | |
186 | |
187 The result will look something like: | |
188 | |
189 :: | |
190 | |
191 #CHROM POS REF ALT ANN[*].EFFECT ANN[*].HGVS_P | |
192 22 17071756 T C 3_prime_UTR_variant,downstream_gene_variant .,. | |
193 22 17072035 C T missense_variant,downstream_gene_variant p.Gly469Glu,. | |
194 22 17072258 C A missense_variant,downstream_gene_variant p.Gly395Cys,. | |
195 | |
196 | |
197 - *Extracting fields with multiple values, one effect per line:* | |
198 | |
199 **CHROM POS REF ALT ANN[*].EFFECT** | |
200 | |
201 The result will look something like: | |
202 | |
203 :: | |
204 | |
205 #CHROM POS REF ALT ANN[*].EFFECT | |
206 22 17071756 T C 3_prime_UTR_variant | |
207 22 17071756 T C downstream_gene_variant | |
208 22 17072035 C T missense_variant | |
209 22 17072035 C T downstream_gene_variant | |
210 22 17072258 C A missense_variant | |
211 22 17072258 C A downstream_gene_variant | |
212 | |
213 | |
214 @EXTERNAL_DOCUMENTATION@ | |
215 http://snpeff.sourceforge.net/SnpSift.html#Extract | |
216 | |
217 ]]> | |
218 </help> | |
219 <expand macro="citations" /> | |
220 </tool> |