comparison snpSift_dbnsfp.xml @ 0:dc480609d9c1 draft

Uploaded
author iuc
date Thu, 22 Jan 2015 08:53:21 -0500
parents
children 13191d4914f7
comparison
equal deleted inserted replaced
-1:000000000000 0:dc480609d9c1
1 <tool id="snpSift_dbnsfp" name="SnpSift dbNSFP" version="4.0.0">
2 <description>Add Annotations from dbNSFP</description>
3 <expand macro="requirements" />
4 <macros>
5 <import>snpSift_macros.xml</import>
6 </macros>
7 <command>
8 java -Xmx6G -jar \$SNPEFF_JAR_PATH/SnpSift.jar dbnsfp -v
9 #if $db.dbsrc == 'cached' :
10 -db $db.dbnsfp
11 #if $db.annotations and $db.annotations.__str__ != '':
12 -f "$db.annotations"
13 #end if
14 #else :
15 -db "${db.dbnsfpdb.extra_files_path}/${db.dbnsfpdb.metadata.bgzip}"
16 #if $db.annotations and $db.annotations.__str__ != '':
17 -f "$db.annotations"
18 #end if
19 #end if
20 $input > $output
21 2> tmp.err &amp;&amp; grep -v file tmp.err
22 </command>
23 <inputs>
24 <param name="input" type="data" format="vcf" label="Variant input file in VCF format"/>
25 <conditional name="db">
26 <param name="dbsrc" type="select" label="dbNSFP ">
27 <option value="cached">Locally installed dbNSFP database </option>
28 <option value="history">dbNSFP database from your history</option>
29 </param>
30 <when value="cached">
31 <param name="dbnsfp" type="select" label="Genome">
32 <options from_data_table="snpsift_dbnsfp">
33 <column name="name" index="1"/>
34 <column name="value" index="2"/>
35 </options>
36 </param>
37 <param name="annotations" type="select" multiple="true" display="checkboxes" label="Annotate with">
38 <options from_data_table="snpsift_dbnsfp">
39 <column name="name" index="3"/>
40 <column name="value" index="3"/>
41 <filter type="param_value" ref="dbnsfp" column="2" />
42 <filter type="multiple_splitter" column="3" separator=","/>
43 </options>
44 </param>
45 </when>
46 <when value="history">
47 <param name="dbnsfpdb" type="data" format="snpsiftdbnsfp" label="DbNSFP"/>
48 <param name="annotations" type="select" multiple="true" display="checkboxes" label="Annotate with">
49 <options>
50 <filter type="data_meta" ref="dbnsfpdb" key="annotation" />
51 </options>
52 </param>
53 </when>
54 </conditional>
55 </inputs>
56 <expand macro="stdio" />
57 <outputs>
58 <data format="vcf" name="output" />
59 </outputs>
60 <tests>
61 <test>
62 <param name="input" ftype="vcf" value="test_annotate_in.vcf.vcf"/>
63 <param name="dbsrc" value="history"/>
64 <param name="dbnsfpdb" value="test_dbnsfpdb.tabular" ftype="dbnsfp.tabular" />
65 <annotations value="aaref,aaalt,genename,aapos,SIFT_score"/>
66 <output name="output">
67 <assert_contents>
68 <has_text text="dbNSFP_SIFT_score=0.15" />
69 </assert_contents>
70 </output>
71 </test>
72 </tests>
73 <help>
74
75 The dbNSFP is an integrated database of functional predictions from multiple algorithms (SIFT, Polyphen2, LRT and MutationTaster, PhyloP and GERP++, etc.).
76
77
78 1000Gp1_AC
79 Alternative allele counts in the whole 1000 genomes phase 1 (1000Gp1) data
80 1000Gp1_AF
81 Alternative allele frequency in the whole 1000Gp1 data
82 1000Gp1_AFR_AC
83 Alternative allele counts in the 1000Gp1 African descendent samples
84 1000Gp1_AFR_AF
85 Alternative allele frequency in the 1000Gp1 African descendent samples
86 1000Gp1_AMR_AC
87 Alternative allele counts in the 1000Gp1 American descendent samples
88 1000Gp1_AMR_AF
89 Alternative allele frequency in the 1000Gp1 American descendent samples
90 1000Gp1_ASN_AC
91 Alternative allele counts in the 1000Gp1 Asian descendent samples
92 1000Gp1_ASN_AF
93 Alternative allele frequency in the 1000Gp1 Asian descendent samples
94 1000Gp1_EUR_AC
95 Alternative allele counts in the 1000Gp1 European descendent samples
96 1000Gp1_EUR_AF
97 Alternative allele frequency in the 1000Gp1 European descendent samples
98 aaalt
99 Alternative amino acid. "." if the variant is a splicing site SNP (2bp on each end of an intron)
100 aapos
101 Amino acid position as to the protein. "-1" if the variant is a splicing site SNP (2bp on each end of an intron)
102 aapos_SIFT
103 ENSP id and amino acid positions corresponding to SIFT scores. Multiple entries separated by ";"
104 aapos_FATHMM
105 ENSP id and amino acid positions corresponding to FATHMM scores. Multiple entries separated by ";"
106 aaref
107 Reference amino acid. "." if the variant is a splicing site SNP (2bp on each end of an intron)
108 alt
109 Alternative nucleotide allele (as on the + strand)
110 Ancestral_allele
111 Ancestral allele (based on 1000 genomes reference data)
112 cds_strand
113 Coding sequence (CDS) strand (+ or -)
114 chr
115 Chromosome number
116 codonpos
117 Position on the codon (1, 2 or 3)
118 Ensembl_geneid
119 Ensembl gene ID
120 Ensembl_transcriptid
121 Ensembl transcript IDs (separated by ";")
122 ESP6500_AA_AF
123 Alternative allele frequency in the African American samples of the NHLBI GO Exome Sequencing Project (ESP6500 data set)
124 ESP6500_EA_AF
125 Alternative allele frequency in the European American samples of the NHLBI GO Exome Sequencing Project (ESP6500 data set)
126 FATHMM_pred
127 If a FATHMM_score is &lt;=-1.5 (or rankscore &lt;=0.81415) the corresponding non-synonymous SNP is predicted as "D(AMAGING)"; otherwise it is predicted as "T(OLERATED)". Multiple predictions separated by ";"
128 FATHMM_rankscore
129 FATHMMori scores were ranked among all FATHMMori scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of FATHMMori scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The scores range from 0 to 1
130 FATHMM_score
131 FATHMM default score (FATHMMori)
132 fold-degenerate
133 Degenerate type (0, 2 or 3)
134 genename
135 Gene name; if the non-synonymous SNP can be assigned to multiple genes, gene names are separated by ";"
136 GERP++_NR
137 GERP++ neutral rate
138 GERP++_RS
139 GERP++ RS score, the larger the score, the more conserved the site
140 GERP++_RS_rankscore
141 GERP++ RS scores were ranked among all GERP++ RS scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of GERP++ RS scores in dbNSFP
142 hg18_pos(1-coor)
143 Physical position on the chromosome as to hg18 (1-based coordinate)
144 Interpro_domain
145 Domain or conserved site on which the variant locates
146 LR_pred
147 Prediction of our LR based ensemble prediction score, "T(olerated)" or "D(amaging)". The score cutoff between "D" and "T" is 0.5. The rankscore cutoff between "D" and "T" is 0.82268
148 LR_rankscore
149 LR scores were ranked among all LR scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of LR scores in dbNSFP. The scores range from 0 to 1
150 LR_score
151 Our logistic regression (LR) based ensemble prediction score, which incorporated 10 scores (SIFT, PolyPhen-2 HDIV, PolyPhen-2 HVAR, GERP++, MutationTaster, Mutation Assessor, FATHMM, LRT, SiPhy, PhyloP) and the maximum frequency observed in the 1000 genomes populations. Larger value means the SNV is more likely to be damaging. Scores range from 0 to 1
152 LRT_Omega
153 Estimated nonsynonymous-to-synonymous-rate ratio (Omega, reported by LRT)
154 LRT_converted_rankscore
155 LRTori scores were first converted as LRTnew=1-LRTori*0.5 if Omega&lt;1, or LRTnew=LRTori*0.5 if Omega&gt;=1. Then LRTnew scores were ranked among all LRTnew scores in dbNSFP. The rankscore is the ratio of the rank over the total number of the scores in dbNSFP. The scores range from 0.00166 to 0.85682
156 LRT_pred
157 LRT prediction, D(eleterious), N(eutral) or U(nknown), which is not solely determined by the score
158 LRT_score
159 The original LRT two-sided p-value (LRTori), ranges from 0 to 1
160 MutationAssessor_pred
161 MutationAssessor's functional impact of a variant
162 MutationAssessor_rankscore
163 MAori scores were ranked among all MAori scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of MAori scores in dbNSFP. The scores range from 0 to 1
164 MutationAssessor_score
165 MutationAssessor functional impact combined score (MAori)
166 MutationTaster_converted_rankscore
167 The MTori scores were first converted: if the prediction is "A" or "D" MTnew=MTori; if the prediction is "N" or "P", MTnew=1-MTori. Then MTnew scores were ranked among all MTnew scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of MTnew scores in dbNSFP. The scores range from 0.0931 to 0.80722
168 MutationTaster_pred
169 MutationTaster prediction
170 MutationTaster_score
171 MutationTaster p-value (MTori), ranges from 0 to 1
172 phastCons46way_placental
173 phastCons conservation score based on the multiple alignments of 33 placental mammal genomes (including human). The larger the score, the more conserved the site
174 phastCons46way_placental_rankscore
175 phastCons46way_placental scores were ranked among all phastCons46way_placental scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phastCons46way_placental scores in dbNSFP
176 phastCons46way_primate
177 phastCons conservation score based on the multiple alignments of 10 primate genomes (including human). The larger the score, the more conserved the site
178 phastCons46way_primate_rankscore
179 phastCons46way_primate scores were ranked among all phastCons46way_primate scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phastCons46way_primate scores in dbNSFP
180 phastCons100way_vertebrate
181 phastCons conservation score based on the multiple alignments of 100 vertebrate genomes (including human). The larger the score, the more conserved the site
182 phastCons100way_vertebrate_rankscore
183 phastCons100way_vertebrate scores were ranked among all phastCons100way_vertebrate scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phastCons100way_vertebrate scores in dbNSFP
184 phyloP46way_placental
185 phyloP (phylogenetic p-values) conservation score based on the multiple alignments of 33 placental mammal genomes (including human). The larger the score, the more conserved the site
186 phyloP46way_placental_rankscore
187 phyloP46way_placental scores were ranked among all phyloP46way_placental scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phyloP46way_placental scores in dbNSFP
188 phyloP46way_primate
189 phyloP (phylogenetic p-values) conservation score based on the multiple alignments of 10 primate genomes (including human). The larger the score, the more conserved the site
190 phyloP46way_primate_rankscore
191 phyloP46way_primate scores were ranked among all phyloP46way_primate scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phyloP46way_primate scores in dbNSFP
192 phyloP100way_vertebrate
193 phyloP (phylogenetic p-values) conservation score based on the multiple alignments of 100 vertebrate genomes (including human). The larger the score, the more conserved the site
194 phyloP100way_vertebrate_rankscore
195 phyloP100way_vertebrate scores were ranked among all phyloP100way_vertebrate scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phyloP100way_vertebrate scores in dbNSFP
196 Polyphen2_HDIV_pred
197 Polyphen2 prediction based on HumDiv
198 Polyphen2_HDIV_rankscore
199 Polyphen2 HDIV scores were first ranked among all HDIV scores in dbNSFP. The rankscore is the ratio of the rank the score over the total number of the scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The scores range from 0.02656 to 0.89917
200 Polyphen2_HDIV_score
201 Polyphen2 score based on HumDiv, i.e. hdiv_prob. The score ranges from 0 to 1. Multiple entries separated by ";"
202 Polyphen2_HVAR_pred
203 Polyphen2 prediction based on HumVar
204 Polyphen2_HVAR_rankscore
205 Polyphen2 HVAR scores were first ranked among all HVAR scores in dbNSFP. The rankscore is the ratio of the rank the score over the total number of the scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The scores range from 0.01281 to 0.9711
206 Polyphen2_HVAR_score
207 Polyphen2 score based on HumVar, i.e. hvar_prob. The score ranges from 0 to 1. Multiple entries separated by ";"
208 pos(1-coor)
209 Physical position on the chromosome as to hg19 (1-based coordinate)
210 RadialSVM_pred
211 Prediction of our SVM based ensemble prediction score, "T(olerated)" or "D(amaging)". The score cutoff between "D" and "T" is 0. The rankscore cutoff between "D" and "T" is 0.83357
212 RadialSVM_rankscore
213 RadialSVM scores were ranked among all RadialSVM scores in dbNSFP. The rankscore is the ratio of the rank of the screo over the total number of RadialSVM scores in dbNSFP. The scores range from 0 to 1
214 RadialSVM_score
215 Our support vector machine (SVM) based ensemble prediction score, which incorporated 10 scores (SIFT, PolyPhen-2 HDIV, PolyPhen-2 HVAR, GERP++, MutationTaster, Mutation Assessor, FATHMM, LRT, SiPhy, PhyloP) and the maximum frequency observed in the 1000 genomes populations. Larger value means the SNV is more likely to be damaging. Scores range from -2 to 3 in dbNSFP
216 ref
217 Reference nucleotide allele (as on the + strand)
218 refcodon
219 Reference codon
220 Reliability_index
221 Number of observed component scores (except the maximum frequency in the 1000 genomes populations) for RadialSVM and LR. Ranges from 1 to 10. As RadialSVM and LR scores are calculated based on imputed data, the less missing component scores, the higher the reliability of the scores and predictions
222 SIFT_converted_rankscore
223 SIFTori scores were first converted to SIFTnew=1-SIFTori, then ranked among all SIFTnew scores in dbNSFP. The rankscore is the ratio of the rank the SIFTnew score over the total number of SIFTnew scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The rankscores range from 0.02654 to 0.87932
224 SIFT_pred
225 If SIFTori is smaller than 0.05 (rankscore&gt;0.55) the corresponding non-synonymous SNP is predicted as "D(amaging)"; otherwise it is predicted as "T(olerated)". Multiple predictions separated by ";"
226 SIFT_score
227 SIFT score (SIFTori). Scores range from 0 to 1. The smaller the score the more likely the SNP has damaging effect. Multiple scores separated by ";"
228 SiPhy_29way_logOdds
229 SiPhy score based on 29 mammals genomes. The larger the score, the more conserved the site
230 SiPhy_29way_pi
231 The estimated stationary distribution of A, C, G and T at the site, using SiPhy algorithm based on 29 mammals genomes
232 SLR_test_statistic
233 SLR test statistic for testing natural selection on codons. A negative value indicates negative selection, and a positive value indicates positive selection. Larger magnitude of the value suggests stronger evidence
234 Uniprot_aapos
235 Amino acid position as to Uniprot. Multiple entries separated by ";"
236 Uniprot_acc
237 Uniprot accession number. Multiple entries separated by ";"
238 Uniprot_id
239 Uniprot ID number. Multiple entries separated by ";"
240 UniSNP_ids
241 rs numbers from UniSNP, which is a cleaned version of dbSNP build 129, in format: rs number1;rs number2;...
242
243
244 The website for dbNSFP database is https://sites.google.com/site/jpopgen/dbNSFP and there is only annotation for human hg18 and hg19 genome builds.
245
246 However, any dbNSFP-like tabular file that be can used with SnpSift dbnsfp if it has::
247
248 - The first line of the file must be column headers that name the annotations.
249 - The first 4 columns are required and must be::
250 1. chromosome
251 2. position in chromosome
252 3. reference base
253 4. alternate base
254
255 For example:
256
257 ::
258
259 #chr pos(1-coor) ref alt aaref aaalt genename SIFT_score
260 1 69134 A C E A OR4F5 0.03
261 1 69134 A G E G OR4F5 0.09
262 1 69134 A T E V OR4F5 0.03
263 4 100239319 T A H L ADH1B 0
264 4 100239319 T C H R ADH1B 0.15
265 4 100239319 T G H P ADH1B 0
266
267
268 The uploaded tabular file should be set to datatype: "dbnsfp.tabular"
269 Using "Convert Format" the "dbnsfp.tabular" can be converted to the correct format for SnpSift dbnsfp.
270
271 The procedure for preparing the dbNSFP data for use in SnpSift dbnsfp is in the SnpSift documentation.
272
273
274 @EXTERNAL_DOCUMENTATION@
275 http://snpeff.sourceforge.net/SnpSift.html#dbNSFP
276
277 @CITATION_SECTION@
278
279
280 </help>
281 </tool>