comparison snpSift_dbnsfp.xml @ 4:4e21e4f2bc48 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tool_collections/snpsift/snpsift_dbnsfp commit fbc18d9128669e461e76ed13307ee88dd774afa5
author iuc
date Mon, 12 Jun 2017 10:25:44 -0400
parents c838e7136a40
children
comparison
equal deleted inserted replaced
3:563d1bdb7b80 4:4e21e4f2bc48
1 <tool id="snpSift_dbnsfp" name="SnpSift dbNSFP" version="@WRAPPER_VERSION@.1"> 1 <tool id="snpSift_dbnsfp" name="SnpSift dbNSFP" version="@WRAPPER_VERSION@.0">
2 <description>Add Annotations from dbNSFP or similar annotation DBs</description> 2 <description>Add annotations from dbNSFP or similar annotation DBs</description>
3 <macros> 3 <macros>
4 <import>snpSift_macros.xml</import> 4 <import>snpSift_macros.xml</import>
5 </macros> 5 </macros>
6 <expand macro="requirements" /> 6 <expand macro="requirements" />
7 <expand macro="stdio" /> 7 <expand macro="stdio" />
8 <expand macro="version_command" /> 8 <expand macro="version_command" />
9 <command><![CDATA[ 9 <command><![CDATA[
10 @CONDA_SNPSIFT_JAR_PATH@ && 10 SnpSift -Xmx6G dbnsfp -v
11 java -Xmx6G -jar "\$SNPSIFT_JAR_PATH/SnpSift.jar" dbnsfp -v 11 #if $db.dbsrc == 'cached':
12 #if $db.dbsrc == 'cached': 12 -db '$db.dbnsfp'
13 -db "$db.dbnsfp" 13 #if $db.annotations and str($db.annotations) != '':
14 #if $db.annotations and str($db.annotations) != '': 14 -f '$db.annotations'
15 -f "$db.annotations" 15 #end if
16 #end if 16 #else:
17 #else: 17 -db '${db.dbnsfpdb.extra_files_path}/${db.dbnsfpdb.metadata.bgzip}'
18 -db "${db.dbnsfpdb.extra_files_path}/${db.dbnsfpdb.metadata.bgzip}" 18 #if $db.annotations and str($db.annotations) != '':
19 #if $db.annotations and str($db.annotations) != '': 19 -f '$db.annotations'
20 -f "$db.annotations" 20 #end if
21 #end if 21 #end if
22 #end if 22 '$input' > '$output'
23 "$input" > "$output" 23 2> tmp.err && grep -v file tmp.err
24 2> tmp.err && grep -v file tmp.err 24 ]]></command>
25 ]]>
26 </command>
27 <inputs> 25 <inputs>
28 <param name="input" type="data" format="vcf" label="Variant input file in VCF format"/> 26 <param name="input" type="data" format="vcf" label="Variant input file in VCF format"/>
29 <conditional name="db"> 27 <conditional name="db">
30 <param name="dbsrc" type="select" label="dbNSFP "> 28 <param name="dbsrc" type="select" label="dbNSFP ">
31 <option value="cached">Locally installed dbNSFP database </option> 29 <option value="cached">Locally installed dbNSFP database </option>
48 </param> 46 </param>
49 </when> 47 </when>
50 <when value="history"> 48 <when value="history">
51 <param name="dbnsfpdb" type="data" format="snpsiftdbnsfp" label="DbNSFP"/> 49 <param name="dbnsfpdb" type="data" format="snpsiftdbnsfp" label="DbNSFP"/>
52 <param name="annotations" type="select" multiple="true" display="checkboxes" label="Annotate with"> 50 <param name="annotations" type="select" multiple="true" display="checkboxes" label="Annotate with">
53 <options> 51 <options>
54 <filter type="data_meta" ref="dbnsfpdb" key="annotation" /> 52 <filter type="data_meta" ref="dbnsfpdb" key="annotation" />
55 </options> 53 </options>
56 </param> 54 </param>
57 </when> 55 </when>
58 </conditional> 56 </conditional>
59 </inputs> 57 </inputs>
60 <outputs> 58 <outputs>
61 <data format="vcf" name="output" /> 59 <data name="output" format="vcf" />
62 </outputs> 60 </outputs>
63 <tests> 61 <tests>
64 <!-- This cannot be tested at the moment because test_dbnsfpdb.tabular 62 <!-- This cannot be tested at the moment because test_dbnsfpdb.tabular
65 is converted from dbnsfp.tabular to snpsiftdbnsfp format on-the-fly 63 is converted from dbnsfp.tabular to snpsiftdbnsfp format on-the-fly
66 when this tool is run and annotation metadata is not available 64 when this tool is run and annotation metadata is not available
76 </assert_contents> 74 </assert_contents>
77 </output> 75 </output>
78 </test> --> 76 </test> -->
79 </tests> 77 </tests>
80 <help><![CDATA[ 78 <help><![CDATA[
81
82 The dbNSFP is an integrated database of functional predictions from multiple algorithms (SIFT, Polyphen2, LRT and MutationTaster, PhyloP and GERP++, etc.). 79 The dbNSFP is an integrated database of functional predictions from multiple algorithms (SIFT, Polyphen2, LRT and MutationTaster, PhyloP and GERP++, etc.).
83 It contains variant annotations such as: 80 It contains variant annotations such as:
84 81
85 82 1000Gp1_AC
86 1000Gp1_AC
87 Alternative allele counts in the whole 1000 genomes phase 1 (1000Gp1) data 83 Alternative allele counts in the whole 1000 genomes phase 1 (1000Gp1) data
88 1000Gp1_AF 84 1000Gp1_AF
89 Alternative allele frequency in the whole 1000Gp1 data 85 Alternative allele frequency in the whole 1000Gp1 data
90 1000Gp1_AFR_AC 86 1000Gp1_AFR_AC
91 Alternative allele counts in the 1000Gp1 African descendent samples 87 Alternative allele counts in the 1000Gp1 African descendent samples
92 1000Gp1_AFR_AF 88 1000Gp1_AFR_AF
93 Alternative allele frequency in the 1000Gp1 African descendent samples 89 Alternative allele frequency in the 1000Gp1 African descendent samples
94 1000Gp1_AMR_AC 90 1000Gp1_AMR_AC
95 Alternative allele counts in the 1000Gp1 American descendent samples 91 Alternative allele counts in the 1000Gp1 American descendent samples
96 1000Gp1_AMR_AF 92 1000Gp1_AMR_AF
97 Alternative allele frequency in the 1000Gp1 American descendent samples 93 Alternative allele frequency in the 1000Gp1 American descendent samples
98 1000Gp1_ASN_AC 94 1000Gp1_ASN_AC
99 Alternative allele counts in the 1000Gp1 Asian descendent samples 95 Alternative allele counts in the 1000Gp1 Asian descendent samples
100 1000Gp1_ASN_AF 96 1000Gp1_ASN_AF
101 Alternative allele frequency in the 1000Gp1 Asian descendent samples 97 Alternative allele frequency in the 1000Gp1 Asian descendent samples
102 1000Gp1_EUR_AC 98 1000Gp1_EUR_AC
103 Alternative allele counts in the 1000Gp1 European descendent samples 99 Alternative allele counts in the 1000Gp1 European descendent samples
104 1000Gp1_EUR_AF 100 1000Gp1_EUR_AF
105 Alternative allele frequency in the 1000Gp1 European descendent samples 101 Alternative allele frequency in the 1000Gp1 European descendent samples
106 aaalt 102 aaalt
107 Alternative amino acid. "." if the variant is a splicing site SNP (2bp on each end of an intron) 103 Alternative amino acid. "." if the variant is a splicing site SNP (2bp on each end of an intron)
108 aapos 104 aapos
109 Amino acid position as to the protein. "-1" if the variant is a splicing site SNP (2bp on each end of an intron) 105 Amino acid position as to the protein. "-1" if the variant is a splicing site SNP (2bp on each end of an intron)
110 aapos_SIFT 106 aapos_SIFT
111 ENSP id and amino acid positions corresponding to SIFT scores. Multiple entries separated by ";" 107 ENSP id and amino acid positions corresponding to SIFT scores. Multiple entries separated by ";"
112 aapos_FATHMM 108 aapos_FATHMM
113 ENSP id and amino acid positions corresponding to FATHMM scores. Multiple entries separated by ";" 109 ENSP id and amino acid positions corresponding to FATHMM scores. Multiple entries separated by ";"
114 aaref 110 aaref
115 Reference amino acid. "." if the variant is a splicing site SNP (2bp on each end of an intron) 111 Reference amino acid. "." if the variant is a splicing site SNP (2bp on each end of an intron)
116 alt 112 alt
117 Alternative nucleotide allele (as on the + strand) 113 Alternative nucleotide allele (as on the + strand)
118 Ancestral_allele 114 Ancestral_allele
119 Ancestral allele (based on 1000 genomes reference data) 115 Ancestral allele (based on 1000 genomes reference data)
120 cds_strand 116 cds_strand
121 Coding sequence (CDS) strand (+ or -) 117 Coding sequence (CDS) strand (+ or -)
122 chr 118 chr
123 Chromosome number 119 Chromosome number
124 codonpos 120 codonpos
125 Position on the codon (1, 2 or 3) 121 Position on the codon (1, 2 or 3)
126 Ensembl_geneid 122 Ensembl_geneid
127 Ensembl gene ID 123 Ensembl gene ID
128 Ensembl_transcriptid 124 Ensembl_transcriptid
129 Ensembl transcript IDs (separated by ";") 125 Ensembl transcript IDs (separated by ";")
130 ESP6500_AA_AF 126 ESP6500_AA_AF
131 Alternative allele frequency in the African American samples of the NHLBI GO Exome Sequencing Project (ESP6500 data set) 127 Alternative allele frequency in the African American samples of the NHLBI GO Exome Sequencing Project (ESP6500 data set)
132 ESP6500_EA_AF 128 ESP6500_EA_AF
133 Alternative allele frequency in the European American samples of the NHLBI GO Exome Sequencing Project (ESP6500 data set) 129 Alternative allele frequency in the European American samples of the NHLBI GO Exome Sequencing Project (ESP6500 data set)
134 FATHMM_pred 130 FATHMM_pred
135 If a FATHMM_score is <=-1.5 (or rankscore <=0.81415) the corresponding non-synonymous SNP is predicted as "D(AMAGING)"; otherwise it is predicted as "T(OLERATED)". Multiple predictions separated by ";" 131 If a FATHMM_score is <=-1.5 (or rankscore <=0.81415) the corresponding non-synonymous SNP is predicted as "D(AMAGING)"; otherwise it is predicted as "T(OLERATED)". Multiple predictions separated by ";"
136 FATHMM_rankscore 132 FATHMM_rankscore
137 FATHMMori scores were ranked among all FATHMMori scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of FATHMMori scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The scores range from 0 to 1 133 FATHMMori scores were ranked among all FATHMMori scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of FATHMMori scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The scores range from 0 to 1
138 FATHMM_score 134 FATHMM_score
139 FATHMM default score (FATHMMori) 135 FATHMM default score (FATHMMori)
140 fold-degenerate 136 fold-degenerate
141 Degenerate type (0, 2 or 3) 137 Degenerate type (0, 2 or 3)
142 genename 138 genename
143 Gene name; if the non-synonymous SNP can be assigned to multiple genes, gene names are separated by ";" 139 Gene name; if the non-synonymous SNP can be assigned to multiple genes, gene names are separated by ";"
144 GERP++_NR 140 GERP++_NR
145 GERP++ neutral rate 141 GERP++ neutral rate
146 GERP++_RS 142 GERP++_RS
147 GERP++ RS score, the larger the score, the more conserved the site 143 GERP++ RS score, the larger the score, the more conserved the site
148 GERP++_RS_rankscore 144 GERP++_RS_rankscore
149 GERP++ RS scores were ranked among all GERP++ RS scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of GERP++ RS scores in dbNSFP 145 GERP++ RS scores were ranked among all GERP++ RS scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of GERP++ RS scores in dbNSFP
150 hg18_pos(1-coor) 146 hg18_pos(1-coor)
151 Physical position on the chromosome as to hg18 (1-based coordinate) 147 Physical position on the chromosome as to hg18 (1-based coordinate)
152 Interpro_domain 148 Interpro_domain
153 Domain or conserved site on which the variant locates 149 Domain or conserved site on which the variant locates
154 LR_pred 150 LR_pred
155 Prediction of our LR based ensemble prediction score, "T(olerated)" or "D(amaging)". The score cutoff between "D" and "T" is 0.5. The rankscore cutoff between "D" and "T" is 0.82268 151 Prediction of our LR based ensemble prediction score, "T(olerated)" or "D(amaging)". The score cutoff between "D" and "T" is 0.5. The rankscore cutoff between "D" and "T" is 0.82268
156 LR_rankscore 152 LR_rankscore
157 LR scores were ranked among all LR scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of LR scores in dbNSFP. The scores range from 0 to 1 153 LR scores were ranked among all LR scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of LR scores in dbNSFP. The scores range from 0 to 1
158 LR_score 154 LR_score
159 Our logistic regression (LR) based ensemble prediction score, which incorporated 10 scores (SIFT, PolyPhen-2 HDIV, PolyPhen-2 HVAR, GERP++, MutationTaster, Mutation Assessor, FATHMM, LRT, SiPhy, PhyloP) and the maximum frequency observed in the 1000 genomes populations. Larger value means the SNV is more likely to be damaging. Scores range from 0 to 1 155 Our logistic regression (LR) based ensemble prediction score, which incorporated 10 scores (SIFT, PolyPhen-2 HDIV, PolyPhen-2 HVAR, GERP++, MutationTaster, Mutation Assessor, FATHMM, LRT, SiPhy, PhyloP) and the maximum frequency observed in the 1000 genomes populations. Larger value means the SNV is more likely to be damaging. Scores range from 0 to 1
160 LRT_Omega 156 LRT_Omega
161 Estimated nonsynonymous-to-synonymous-rate ratio (Omega, reported by LRT) 157 Estimated nonsynonymous-to-synonymous-rate ratio (Omega, reported by LRT)
162 LRT_converted_rankscore 158 LRT_converted_rankscore
163 LRTori scores were first converted as LRTnew=1-LRTori*0.5 if Omega<1, or LRTnew=LRTori*0.5 if Omega>=1. Then LRTnew scores were ranked among all LRTnew scores in dbNSFP. The rankscore is the ratio of the rank over the total number of the scores in dbNSFP. The scores range from 0.00166 to 0.85682 159 LRTori scores were first converted as LRTnew=1-LRTori*0.5 if Omega<1, or LRTnew=LRTori*0.5 if Omega>=1. Then LRTnew scores were ranked among all LRTnew scores in dbNSFP. The rankscore is the ratio of the rank over the total number of the scores in dbNSFP. The scores range from 0.00166 to 0.85682
164 LRT_pred 160 LRT_pred
165 LRT prediction, D(eleterious), N(eutral) or U(nknown), which is not solely determined by the score 161 LRT prediction, D(eleterious), N(eutral) or U(nknown), which is not solely determined by the score
166 LRT_score 162 LRT_score
167 The original LRT two-sided p-value (LRTori), ranges from 0 to 1 163 The original LRT two-sided p-value (LRTori), ranges from 0 to 1
168 MutationAssessor_pred 164 MutationAssessor_pred
169 MutationAssessor's functional impact of a variant 165 MutationAssessor's functional impact of a variant
170 MutationAssessor_rankscore 166 MutationAssessor_rankscore
171 MAori scores were ranked among all MAori scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of MAori scores in dbNSFP. The scores range from 0 to 1 167 MAori scores were ranked among all MAori scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of MAori scores in dbNSFP. The scores range from 0 to 1
172 MutationAssessor_score 168 MutationAssessor_score
173 MutationAssessor functional impact combined score (MAori) 169 MutationAssessor functional impact combined score (MAori)
174 MutationTaster_converted_rankscore 170 MutationTaster_converted_rankscore
175 The MTori scores were first converted: if the prediction is "A" or "D" MTnew=MTori; if the prediction is "N" or "P", MTnew=1-MTori. Then MTnew scores were ranked among all MTnew scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of MTnew scores in dbNSFP. The scores range from 0.0931 to 0.80722 171 The MTori scores were first converted: if the prediction is "A" or "D" MTnew=MTori; if the prediction is "N" or "P", MTnew=1-MTori. Then MTnew scores were ranked among all MTnew scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of MTnew scores in dbNSFP. The scores range from 0.0931 to 0.80722
176 MutationTaster_pred 172 MutationTaster_pred
177 MutationTaster prediction 173 MutationTaster prediction
178 MutationTaster_score 174 MutationTaster_score
179 MutationTaster p-value (MTori), ranges from 0 to 1 175 MutationTaster p-value (MTori), ranges from 0 to 1
180 phastCons46way_placental 176 phastCons46way_placental
181 phastCons conservation score based on the multiple alignments of 33 placental mammal genomes (including human). The larger the score, the more conserved the site 177 phastCons conservation score based on the multiple alignments of 33 placental mammal genomes (including human). The larger the score, the more conserved the site
182 phastCons46way_placental_rankscore 178 phastCons46way_placental_rankscore
183 phastCons46way_placental scores were ranked among all phastCons46way_placental scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phastCons46way_placental scores in dbNSFP 179 phastCons46way_placental scores were ranked among all phastCons46way_placental scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phastCons46way_placental scores in dbNSFP
184 phastCons46way_primate 180 phastCons46way_primate
185 phastCons conservation score based on the multiple alignments of 10 primate genomes (including human). The larger the score, the more conserved the site 181 phastCons conservation score based on the multiple alignments of 10 primate genomes (including human). The larger the score, the more conserved the site
186 phastCons46way_primate_rankscore 182 phastCons46way_primate_rankscore
187 phastCons46way_primate scores were ranked among all phastCons46way_primate scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phastCons46way_primate scores in dbNSFP 183 phastCons46way_primate scores were ranked among all phastCons46way_primate scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phastCons46way_primate scores in dbNSFP
188 phastCons100way_vertebrate 184 phastCons100way_vertebrate
189 phastCons conservation score based on the multiple alignments of 100 vertebrate genomes (including human). The larger the score, the more conserved the site 185 phastCons conservation score based on the multiple alignments of 100 vertebrate genomes (including human). The larger the score, the more conserved the site
190 phastCons100way_vertebrate_rankscore 186 phastCons100way_vertebrate_rankscore
191 phastCons100way_vertebrate scores were ranked among all phastCons100way_vertebrate scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phastCons100way_vertebrate scores in dbNSFP 187 phastCons100way_vertebrate scores were ranked among all phastCons100way_vertebrate scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phastCons100way_vertebrate scores in dbNSFP
192 phyloP46way_placental 188 phyloP46way_placental
193 phyloP (phylogenetic p-values) conservation score based on the multiple alignments of 33 placental mammal genomes (including human). The larger the score, the more conserved the site 189 phyloP (phylogenetic p-values) conservation score based on the multiple alignments of 33 placental mammal genomes (including human). The larger the score, the more conserved the site
194 phyloP46way_placental_rankscore 190 phyloP46way_placental_rankscore
195 phyloP46way_placental scores were ranked among all phyloP46way_placental scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phyloP46way_placental scores in dbNSFP 191 phyloP46way_placental scores were ranked among all phyloP46way_placental scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phyloP46way_placental scores in dbNSFP
196 phyloP46way_primate 192 phyloP46way_primate
197 phyloP (phylogenetic p-values) conservation score based on the multiple alignments of 10 primate genomes (including human). The larger the score, the more conserved the site 193 phyloP (phylogenetic p-values) conservation score based on the multiple alignments of 10 primate genomes (including human). The larger the score, the more conserved the site
198 phyloP46way_primate_rankscore 194 phyloP46way_primate_rankscore
199 phyloP46way_primate scores were ranked among all phyloP46way_primate scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phyloP46way_primate scores in dbNSFP 195 phyloP46way_primate scores were ranked among all phyloP46way_primate scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phyloP46way_primate scores in dbNSFP
200 phyloP100way_vertebrate 196 phyloP100way_vertebrate
201 phyloP (phylogenetic p-values) conservation score based on the multiple alignments of 100 vertebrate genomes (including human). The larger the score, the more conserved the site 197 phyloP (phylogenetic p-values) conservation score based on the multiple alignments of 100 vertebrate genomes (including human). The larger the score, the more conserved the site
202 phyloP100way_vertebrate_rankscore 198 phyloP100way_vertebrate_rankscore
203 phyloP100way_vertebrate scores were ranked among all phyloP100way_vertebrate scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phyloP100way_vertebrate scores in dbNSFP 199 phyloP100way_vertebrate scores were ranked among all phyloP100way_vertebrate scores in dbNSFP. The rankscore is the ratio of the rank of the score over the total number of phyloP100way_vertebrate scores in dbNSFP
204 Polyphen2_HDIV_pred 200 Polyphen2_HDIV_pred
205 Polyphen2 prediction based on HumDiv 201 Polyphen2 prediction based on HumDiv
206 Polyphen2_HDIV_rankscore 202 Polyphen2_HDIV_rankscore
207 Polyphen2 HDIV scores were first ranked among all HDIV scores in dbNSFP. The rankscore is the ratio of the rank the score over the total number of the scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The scores range from 0.02656 to 0.89917 203 Polyphen2 HDIV scores were first ranked among all HDIV scores in dbNSFP. The rankscore is the ratio of the rank the score over the total number of the scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The scores range from 0.02656 to 0.89917
208 Polyphen2_HDIV_score 204 Polyphen2_HDIV_score
209 Polyphen2 score based on HumDiv, i.e. hdiv_prob. The score ranges from 0 to 1. Multiple entries separated by ";" 205 Polyphen2 score based on HumDiv, i.e. hdiv_prob. The score ranges from 0 to 1. Multiple entries separated by ";"
210 Polyphen2_HVAR_pred 206 Polyphen2_HVAR_pred
211 Polyphen2 prediction based on HumVar 207 Polyphen2 prediction based on HumVar
212 Polyphen2_HVAR_rankscore 208 Polyphen2_HVAR_rankscore
213 Polyphen2 HVAR scores were first ranked among all HVAR scores in dbNSFP. The rankscore is the ratio of the rank the score over the total number of the scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The scores range from 0.01281 to 0.9711 209 Polyphen2 HVAR scores were first ranked among all HVAR scores in dbNSFP. The rankscore is the ratio of the rank the score over the total number of the scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The scores range from 0.01281 to 0.9711
214 Polyphen2_HVAR_score 210 Polyphen2_HVAR_score
215 Polyphen2 score based on HumVar, i.e. hvar_prob. The score ranges from 0 to 1. Multiple entries separated by ";" 211 Polyphen2 score based on HumVar, i.e. hvar_prob. The score ranges from 0 to 1. Multiple entries separated by ";"
216 pos(1-coor) 212 pos(1-coor)
217 Physical position on the chromosome as to hg19 (1-based coordinate) 213 Physical position on the chromosome as to hg19 (1-based coordinate)
218 RadialSVM_pred 214 RadialSVM_pred
219 Prediction of our SVM based ensemble prediction score, "T(olerated)" or "D(amaging)". The score cutoff between "D" and "T" is 0. The rankscore cutoff between "D" and "T" is 0.83357 215 Prediction of our SVM based ensemble prediction score, "T(olerated)" or "D(amaging)". The score cutoff between "D" and "T" is 0. The rankscore cutoff between "D" and "T" is 0.83357
220 RadialSVM_rankscore 216 RadialSVM_rankscore
221 RadialSVM scores were ranked among all RadialSVM scores in dbNSFP. The rankscore is the ratio of the rank of the screo over the total number of RadialSVM scores in dbNSFP. The scores range from 0 to 1 217 RadialSVM scores were ranked among all RadialSVM scores in dbNSFP. The rankscore is the ratio of the rank of the screo over the total number of RadialSVM scores in dbNSFP. The scores range from 0 to 1
222 RadialSVM_score 218 RadialSVM_score
223 Our support vector machine (SVM) based ensemble prediction score, which incorporated 10 scores (SIFT, PolyPhen-2 HDIV, PolyPhen-2 HVAR, GERP++, MutationTaster, Mutation Assessor, FATHMM, LRT, SiPhy, PhyloP) and the maximum frequency observed in the 1000 genomes populations. Larger value means the SNV is more likely to be damaging. Scores range from -2 to 3 in dbNSFP 219 Our support vector machine (SVM) based ensemble prediction score, which incorporated 10 scores (SIFT, PolyPhen-2 HDIV, PolyPhen-2 HVAR, GERP++, MutationTaster, Mutation Assessor, FATHMM, LRT, SiPhy, PhyloP) and the maximum frequency observed in the 1000 genomes populations. Larger value means the SNV is more likely to be damaging. Scores range from -2 to 3 in dbNSFP
224 ref 220 ref
225 Reference nucleotide allele (as on the + strand) 221 Reference nucleotide allele (as on the + strand)
226 refcodon 222 refcodon
227 Reference codon 223 Reference codon
228 Reliability_index 224 Reliability_index
229 Number of observed component scores (except the maximum frequency in the 1000 genomes populations) for RadialSVM and LR. Ranges from 1 to 10. As RadialSVM and LR scores are calculated based on imputed data, the less missing component scores, the higher the reliability of the scores and predictions 225 Number of observed component scores (except the maximum frequency in the 1000 genomes populations) for RadialSVM and LR. Ranges from 1 to 10. As RadialSVM and LR scores are calculated based on imputed data, the less missing component scores, the higher the reliability of the scores and predictions
230 SIFT_converted_rankscore 226 SIFT_converted_rankscore
231 SIFTori scores were first converted to SIFTnew=1-SIFTori, then ranked among all SIFTnew scores in dbNSFP. The rankscore is the ratio of the rank the SIFTnew score over the total number of SIFTnew scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The rankscores range from 0.02654 to 0.87932 227 SIFTori scores were first converted to SIFTnew=1-SIFTori, then ranked among all SIFTnew scores in dbNSFP. The rankscore is the ratio of the rank the SIFTnew score over the total number of SIFTnew scores in dbNSFP. If there are multiple scores, only the most damaging (largest) rankscore is presented. The rankscores range from 0.02654 to 0.87932
232 SIFT_pred 228 SIFT_pred
233 If SIFTori is smaller than 0.05 (rankscore>0.55) the corresponding non-synonymous SNP is predicted as "D(amaging)"; otherwise it is predicted as "T(olerated)". Multiple predictions separated by ";" 229 If SIFTori is smaller than 0.05 (rankscore>0.55) the corresponding non-synonymous SNP is predicted as "D(amaging)"; otherwise it is predicted as "T(olerated)". Multiple predictions separated by ";"
234 SIFT_score 230 SIFT_score
235 SIFT score (SIFTori). Scores range from 0 to 1. The smaller the score the more likely the SNP has damaging effect. Multiple scores separated by ";" 231 SIFT score (SIFTori). Scores range from 0 to 1. The smaller the score the more likely the SNP has damaging effect. Multiple scores separated by ";"
236 SiPhy_29way_logOdds 232 SiPhy_29way_logOdds
237 SiPhy score based on 29 mammals genomes. The larger the score, the more conserved the site 233 SiPhy score based on 29 mammals genomes. The larger the score, the more conserved the site
238 SiPhy_29way_pi 234 SiPhy_29way_pi
239 The estimated stationary distribution of A, C, G and T at the site, using SiPhy algorithm based on 29 mammals genomes 235 The estimated stationary distribution of A, C, G and T at the site, using SiPhy algorithm based on 29 mammals genomes
240 SLR_test_statistic 236 SLR_test_statistic
241 SLR test statistic for testing natural selection on codons. A negative value indicates negative selection, and a positive value indicates positive selection. Larger magnitude of the value suggests stronger evidence 237 SLR test statistic for testing natural selection on codons. A negative value indicates negative selection, and a positive value indicates positive selection. Larger magnitude of the value suggests stronger evidence
242 Uniprot_aapos 238 Uniprot_aapos
243 Amino acid position as to Uniprot. Multiple entries separated by ";" 239 Amino acid position as to Uniprot. Multiple entries separated by ";"
244 Uniprot_acc 240 Uniprot_acc
245 Uniprot accession number. Multiple entries separated by ";" 241 Uniprot accession number. Multiple entries separated by ";"
246 Uniprot_id 242 Uniprot_id
247 Uniprot ID number. Multiple entries separated by ";" 243 Uniprot ID number. Multiple entries separated by ";"
248 UniSNP_ids 244 UniSNP_ids
249 rs numbers from UniSNP, which is a cleaned version of dbSNP build 129, in format: rs number1;rs number2;... 245 rs numbers from UniSNP, which is a cleaned version of dbSNP build 129, in format: rs number1;rs number2;...
250 246
251 247 The dbNSFP database is available from https://sites.google.com/site/jpopgen/dbNSFP and there is only annotation for human genome builds.
252 The website for dbNSFP database is https://sites.google.com/site/jpopgen/dbNSFP and there is only annotation for human genome builds. 248
253 249 The procedure for preparing the dbNSFP data for use in SnpSift dbnsfp and a couple of prebuilt dbNSFP databases are available at:
254 The procedure for preparing the dbNSFP data for use in SnpSift dbnsfp is in the SnpSift documentation:
255 *( It also provides links for dbNSFP databases prebuilt for SnpSift )*
256 http://snpeff.sourceforge.net/SnpSift.html#dbNSFP 250 http://snpeff.sourceforge.net/SnpSift.html#dbNSFP
257 251
258 However, any dbNSFP-like tabular file that be can used with SnpSift dbnsfp if it has:: 252 However, any dbNSFP-like tabular file that be can used with SnpSift dbnsfp if it has:
259 253
260 - The first line of the file must be column headers that name the annotations. 254 - The first line of the file must be column headers that name the annotations.
261 - The first 4 columns are required and must be:: 255 - The first 4 columns are required and must be:
262 1. chromosome 256
263 2. position in chromosome 257 1. chr: chromosome
264 3. reference base 258 2. pos(1-coor): position in chromosome
265 4. alternate base 259 3. ref: reference base
266 260 4. alt: alternate base
267 For example: 261
268 262 For example::
269 :: 263
270 264 #chr pos(1-coor) ref alt aaref aaalt genename SIFT_score
271 #chr pos(1-coor) ref alt aaref aaalt genename SIFT_score 265 1 69134 A C E A OR4F5 0.03
272 1 69134 A C E A OR4F5 0.03 266 1 69134 A G E G OR4F5 0.09
273 1 69134 A G E G OR4F5 0.09 267 1 69134 A T E V OR4F5 0.03
274 1 69134 A T E V OR4F5 0.03 268 4 100239319 T A H L ADH1B 0
275 4 100239319 T A H L ADH1B 0 269 4 100239319 T C H R ADH1B 0.15
276 4 100239319 T C H R ADH1B 0.15 270 4 100239319 T G H P ADH1B 0
277 4 100239319 T G H P ADH1B 0 271
278 272 The Galaxy datatypes for dbNSFP can automatically convert the specially formatted tabular file for use by SnpSift dbNSFP:
279 273
280 The galaxy datatypes for dbNSFP can automatically convert the specially formatted tabular file for use by SnpSift dbNSFP: 274 1. Upload the tabular file, set the datatype as: **"dbnsfp.tabular"**
281 1. Upload the tabular file, set the datatype as: **"dbnsfp.tabular"** 275 2. Edit the history dataset attributes (pencil icon): Use "Convert Format" to convert the **"dbnsfp.tabular"** to the correct format for SnpSift dbnsfp: **"snpsiftdbnsfp"**.
282 2. Edit the history dataset attributes (pencil icon): Use "Convert Format" to convert the **"dbnsfp.tabular"** to the correct format for SnpSift dbnsfp: **"snpsiftdbnsfp"**.
283
284 276
285 @EXTERNAL_DOCUMENTATION@ 277 @EXTERNAL_DOCUMENTATION@
286 http://snpeff.sourceforge.net/SnpSift.html#dbNSFP 278 - http://snpeff.sourceforge.net/SnpSift.html#dbNSFP
287 ]]> 279 ]]></help>
288 </help>
289 <expand macro="citations"> 280 <expand macro="citations">
290 <citation type="doi">DOI: 10.1002/humu.21517</citation> 281 <citation type="doi">10.1002/humu.21517</citation>
291 <citation type="doi">DOI: 10.1002/humu.22376</citation> 282 <citation type="doi">10.1002/humu.22932</citation>
292 <citation type="doi">DOI: 10.1002/humu.22932</citation> 283 <citation type="doi">10.1093/hmg/ddu733</citation>
293 <citation type="doi">doi: 10.1093/hmg/ddu733</citation> 284 <citation type="doi">10.3389/fgene.2012.00035</citation>
294 <citation type="doi">doi: 10.1093/nar/gku1206</citation>
295 <citation type="doi">doi: 10.3389/fgene.2012.00035</citation>
296 </expand> 285 </expand>
297 </tool> 286 </tool>