0
|
1 <tool id="hgv_sift" name="SIFT" version="1.0.0">
|
|
2 <description>predictions of functional sites</description>
|
|
3
|
|
4 <command interpreter="bash">
|
|
5 sift_variants_wrapper.sh "$input" "$output" "${input.metadata.dbkey}" "${GALAXY_DATA_INDEX_DIR}/sift_db.loc" "$chrom_col" "$pos_col" "$base" "$allele_col" "$strand_source.strand_col" "$comment_source.comment_col" "$output_opts"
|
|
6 </command>
|
|
7
|
|
8 <inputs>
|
|
9 <param name="input" type="data" format="tabular" label="Dataset">
|
|
10 <validator type="unspecified_build"/>
|
|
11 <validator type="dataset_metadata_in_file" filename="sift_db.loc" metadata_name="dbkey" metadata_column="0" message="Data is currently not available for the specified build."/>
|
|
12 </param>
|
|
13 <param name="chrom_col" type="data_column" data_ref="input" label="Column with chromosome"/>
|
|
14 <param name="pos_col" type="data_column" data_ref="input" numerical="true" label="Column with position"/>
|
|
15 <param name="base" type="select" label="Position coordinates are">
|
|
16 <option value="1" selected="true">one-based</option>
|
|
17 <option value="0">zero-based</option>
|
|
18 </param>
|
|
19 <param name="allele_col" type="data_column" data_ref="input" label="Column with allele"/>
|
|
20 <conditional name="strand_source">
|
|
21 <param name="strand_choice" type="select" label="Strand info">
|
|
22 <option value="data_column" selected="true">a column in the dataset</option>
|
|
23 <option value="all_pos">all on sense/forward/+ strand</option>
|
|
24 <option value="all_neg">all on antisense/reverse/- strand</option>
|
|
25 </param>
|
|
26 <when value="data_column">
|
|
27 <param name="strand_col" type="data_column" data_ref="input" label="Column with strand"/>
|
|
28 </when>
|
|
29 <when value="all_pos">
|
|
30 <param name="strand_col" type="hidden" value="+"/>
|
|
31 </when>
|
|
32 <when value="all_neg">
|
|
33 <param name="strand_col" type="hidden" value="-"/>
|
|
34 </when>
|
|
35 </conditional>
|
|
36 <conditional name="comment_source">
|
|
37 <param name="comment_choice" type="select" label="Include comment column">
|
|
38 <option value="no" selected="true">no</option>
|
|
39 <option value="yes">yes</option>
|
|
40 </param>
|
|
41 <when value="no">
|
|
42 <param name="comment_col" type="hidden" value="-"/>
|
|
43 </when>
|
|
44 <when value="yes">
|
|
45 <param name="comment_col" type="data_column" data_ref="input" label="Column with comment"/>
|
|
46 </when>
|
|
47 </conditional>
|
|
48 <param name="output_opts" type="select" multiple="true" display="checkboxes" label="Include the following additional fields in the output">
|
|
49 <option value="A">Ensembl Gene ID</option>
|
|
50 <option value="B">Gene Name</option>
|
|
51 <option value="C">Gene Description</option>
|
|
52 <option value="D">Ensembl Protein Family ID</option>
|
|
53 <option value="E">Ensembl Protein Family Description</option>
|
|
54 <option value="F">Ensembl Transcript Status (Known / Novel)</option>
|
|
55 <option value="G">Protein Family Size</option>
|
|
56 <option value="H">Ka/Ks (Human-mouse)</option>
|
|
57 <option value="I">Ka/Ks (Human-macaque)</option>
|
|
58 <option value="J">OMIM Disease</option>
|
|
59 <option value="K">Allele Frequencies (All Hapmap Populations - weighted average)</option>
|
|
60 <option value="L">Allele Frequencies (CEU Hapmap population)</option>
|
|
61 </param>
|
|
62 </inputs>
|
|
63
|
|
64 <outputs>
|
|
65 <data format="tabular" name="output" />
|
|
66 </outputs>
|
|
67
|
|
68 <requirements>
|
|
69 <requirement type="binary">awk</requirement>
|
|
70 <requirement type="binary">rm</requirement>
|
|
71 <requirement type="binary">sed</requirement>
|
|
72 </requirements>
|
|
73
|
|
74 <tests>
|
|
75 <test>
|
|
76 <param name="input" value="sift_variants.tab" ftype="tabular" dbkey="hg18"/>
|
|
77 <param name="chrom_col" value="1"/>
|
|
78 <param name="pos_col" value="3"/>
|
|
79 <param name="base" value="1"/>
|
|
80 <param name="allele_col" value="5"/>
|
|
81 <param name="strand_choice" value="data_column"/>
|
|
82 <param name="strand_col" value="4"/>
|
|
83 <param name="output_opts" value="A"/>
|
|
84 <output name="output" file="sift_variants_result.tab"/>
|
|
85 </test>
|
|
86 </tests>
|
|
87
|
|
88 <help>
|
|
89 .. class:: warningmark
|
|
90
|
|
91 This currently works only for builds hg18 or hg19.
|
|
92
|
|
93 -----
|
|
94
|
|
95 **Dataset formats**
|
|
96
|
|
97 The input and output datasets are tabular_.
|
|
98 (`Dataset missing?`_)
|
|
99
|
|
100 .. _tabular: ./static/formatHelp.html#tab
|
|
101 .. _Dataset missing?: ./static/formatHelp.html
|
|
102
|
|
103 -----
|
|
104
|
|
105 **What it does**
|
|
106
|
|
107 SIFT predicts whether an amino-acid substitution affects protein function,
|
|
108 based on sequence homology and the physical properties of amino acids.
|
|
109 SIFT can be applied to naturally occurring non-synonymous polymorphisms
|
|
110 and laboratory-induced missense mutations. This tool uses SQLite databases
|
|
111 containing pre-computed SIFT scores and annotations for all possible nucleotide
|
|
112 substitutions at each position in the human exome. Allele frequency data
|
|
113 are from the HapMap frequency database, and additional transcript and
|
|
114 gene-level data are from Ensembl BioMart.
|
|
115
|
|
116 The input dataset must contain columns for the chromosome, position, and
|
|
117 alleles. The alleles must be two nucleotides separated by '/',
|
|
118 usually the reference allele and the allele of interest.
|
|
119 The strand must either be in another column or all the same.
|
|
120 The output contains a standard set of columns plus the additional ones that
|
|
121 have been selected from the list above.
|
|
122
|
|
123 Website: http://sift.jcvi.org/
|
|
124
|
|
125 -----
|
|
126
|
|
127 **Example**
|
|
128
|
|
129 - input file::
|
|
130
|
|
131 chr3 81780820 + T/C
|
|
132 chr2 230341630 + G/A
|
|
133 chr2 43881517 + A/T
|
|
134 chr2 43857514 + T/C
|
|
135 chr6 88375602 + G/A
|
|
136 chr22 29307353 - T/A
|
|
137 chr10 115912482 - G/T
|
|
138 chr10 115900918 - C/T
|
|
139 chr16 69875502 + G/T
|
|
140 etc.
|
|
141
|
|
142 - output file::
|
|
143
|
|
144 #Chrom Position Strand Allele Codons Transcript ID Protein ID Substitution Region dbSNP ID SNP Type Prediction Score Median Info Num seqs at position User Comment
|
|
145 chr3 81780820 + T/C AGA-gGA ENST00000264326 ENSP00000264326 R190G EXON CDS rs2229519:C Nonsynonymous DAMAGING 0.04 3.06 149
|
|
146 chr2 230341630 + G/T - ENST00000389045 ENSP00000373697 NA EXON CDS rs1803846:A Unknown Not scored NA NA NA
|
|
147 chr2 43881517 + A/T ATA-tTA ENST00000260605 ENSP00000260605 I230L EXON CDS rs11556157:T Nonsynonymous TOLERATED 0.47 3.19 7
|
|
148 chr2 43857514 + T/C TTT-TcT ENST00000260605 ENSP00000260605 F33S EXON CDS rs2288709:C Nonsynonymous TOLERATED 0.61 3.33 6
|
|
149 chr6 88375602 + G/A GTT-aTT ENST00000257789 ENSP00000257789 V217I EXON CDS rs2307389:A Nonsynonymous TOLERATED 0.75 3.17 13
|
|
150 chr22 29307353 + T/A ACC-tCC ENST00000335214 ENSP00000334612 T264S EXON CDS rs42942:A Nonsynonymous TOLERATED 0.4 3.14 23
|
|
151 chr10 115912482 + C/A CGA-CtA ENST00000369285 ENSP00000358291 R179L EXON CDS rs12782946:T Nonsynonymous TOLERATED 0.06 4.32 2
|
|
152 chr10 115900918 + G/A CAA-tAA ENST00000369287 ENSP00000358293 Q271* EXON CDS rs7095762:T Nonsynonymous N/A N/A N/A N/A
|
|
153 chr16 69875502 + G/T ACA-AaA ENST00000338099 ENSP00000337512 T608K EXON CDS rs3096381:T Nonsynonymous TOLERATED 0.12 3.41 3
|
|
154 etc.
|
|
155
|
|
156 -----
|
|
157
|
|
158 **References**
|
|
159
|
|
160 Ng PC, Henikoff S. (2001) Predicting deleterious amino acid substitutions.
|
|
161 Genome Res. 11(5):863-74.
|
|
162
|
|
163 Ng PC, Henikoff S. (2002) Accounting for human polymorphisms predicted to affect protein function.
|
|
164 Genome Res. 12(3):436-46.
|
|
165
|
|
166 Ng PC, Henikoff S. (2003) SIFT: Predicting amino acid changes that affect protein function.
|
|
167 Nucleic Acids Res. 31(13):3812-4.
|
|
168
|
|
169 Kumar P, Henikoff S, Ng PC. (2009) Predicting the effects of coding non-synonymous variants
|
|
170 on protein function using the SIFT algorithm.
|
|
171 Nat Protoc. 4(7):1073-81. Epub 2009 Jun 25.
|
|
172
|
|
173 </help>
|
|
174 </tool>
|