annotate tools/human_genome_variation/sift.xml @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 <tool id="hgv_sift" name="SIFT" version="1.0.0">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 <description>predictions of functional sites</description>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 <command interpreter="bash">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 sift_variants_wrapper.sh "$input" "$output" "${input.metadata.dbkey}" "${GALAXY_DATA_INDEX_DIR}/sift_db.loc" "$chrom_col" "$pos_col" "$base" "$allele_col" "$strand_source.strand_col" "$comment_source.comment_col" "$output_opts"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 </command>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 <inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 <param name="input" type="data" format="tabular" label="Dataset">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 <validator type="unspecified_build"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 <validator type="dataset_metadata_in_file" filename="sift_db.loc" metadata_name="dbkey" metadata_column="0" message="Data is currently not available for the specified build."/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 <param name="chrom_col" type="data_column" data_ref="input" label="Column with chromosome"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 <param name="pos_col" type="data_column" data_ref="input" numerical="true" label="Column with position"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 <param name="base" type="select" label="Position coordinates are">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 <option value="1" selected="true">one-based</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 <option value="0">zero-based</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 <param name="allele_col" type="data_column" data_ref="input" label="Column with allele"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 <conditional name="strand_source">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 <param name="strand_choice" type="select" label="Strand info">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 <option value="data_column" selected="true">a column in the dataset</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 <option value="all_pos">all on sense/forward/+ strand</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 <option value="all_neg">all on antisense/reverse/- strand</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 <when value="data_column">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 <param name="strand_col" type="data_column" data_ref="input" label="Column with strand"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 <when value="all_pos">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 <param name="strand_col" type="hidden" value="+"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 <when value="all_neg">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 <param name="strand_col" type="hidden" value="-"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 </conditional>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 <conditional name="comment_source">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 <param name="comment_choice" type="select" label="Include comment column">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 <option value="no" selected="true">no</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 <option value="yes">yes</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 <when value="no">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 <param name="comment_col" type="hidden" value="-"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 <when value="yes">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 <param name="comment_col" type="data_column" data_ref="input" label="Column with comment"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 </when>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 </conditional>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 <param name="output_opts" type="select" multiple="true" display="checkboxes" label="Include the following additional fields in the output">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 <option value="A">Ensembl Gene ID</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 <option value="B">Gene Name</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 <option value="C">Gene Description</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 <option value="D">Ensembl Protein Family ID</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 <option value="E">Ensembl Protein Family Description</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 <option value="F">Ensembl Transcript Status (Known / Novel)</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 <option value="G">Protein Family Size</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 <option value="H">Ka/Ks (Human-mouse)</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 <option value="I">Ka/Ks (Human-macaque)</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 <option value="J">OMIM Disease</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 <option value="K">Allele Frequencies (All Hapmap Populations - weighted average)</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 <option value="L">Allele Frequencies (CEU Hapmap population)</option>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 </inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 <outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 <data format="tabular" name="output" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 </outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 <requirements>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 <requirement type="binary">awk</requirement>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 <requirement type="binary">rm</requirement>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 <requirement type="binary">sed</requirement>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 </requirements>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 <tests>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 <param name="input" value="sift_variants.tab" ftype="tabular" dbkey="hg18"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 <param name="chrom_col" value="1"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 <param name="pos_col" value="3"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 <param name="base" value="1"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 <param name="allele_col" value="5"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 <param name="strand_choice" value="data_column"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 <param name="strand_col" value="4"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 <param name="output_opts" value="A"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 <output name="output" file="sift_variants_result.tab"/>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86 </tests>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88 <help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89 .. class:: warningmark
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91 This currently works only for builds hg18 or hg19.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
95 **Dataset formats**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
96
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
97 The input and output datasets are tabular_.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
98 (`Dataset missing?`_)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
99
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
100 .. _tabular: ./static/formatHelp.html#tab
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
101 .. _Dataset missing?: ./static/formatHelp.html
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
102
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
103 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
104
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
105 **What it does**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
106
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
107 SIFT predicts whether an amino-acid substitution affects protein function,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
108 based on sequence homology and the physical properties of amino acids.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
109 SIFT can be applied to naturally occurring non-synonymous polymorphisms
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
110 and laboratory-induced missense mutations. This tool uses SQLite databases
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
111 containing pre-computed SIFT scores and annotations for all possible nucleotide
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
112 substitutions at each position in the human exome. Allele frequency data
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
113 are from the HapMap frequency database, and additional transcript and
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
114 gene-level data are from Ensembl BioMart.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
115
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
116 The input dataset must contain columns for the chromosome, position, and
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
117 alleles. The alleles must be two nucleotides separated by '/',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
118 usually the reference allele and the allele of interest.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
119 The strand must either be in another column or all the same.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
120 The output contains a standard set of columns plus the additional ones that
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
121 have been selected from the list above.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
122
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
123 Website: http://sift.jcvi.org/
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
124
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
125 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
126
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
127 **Example**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
128
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
129 - input file::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
130
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
131 chr3 81780820 + T/C
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
132 chr2 230341630 + G/A
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
133 chr2 43881517 + A/T
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
134 chr2 43857514 + T/C
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
135 chr6 88375602 + G/A
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
136 chr22 29307353 - T/A
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
137 chr10 115912482 - G/T
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
138 chr10 115900918 - C/T
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
139 chr16 69875502 + G/T
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
140 etc.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
141
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
142 - output file::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
143
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
144 #Chrom Position Strand Allele Codons Transcript ID Protein ID Substitution Region dbSNP ID SNP Type Prediction Score Median Info Num seqs at position User Comment
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
145 chr3 81780820 + T/C AGA-gGA ENST00000264326 ENSP00000264326 R190G EXON CDS rs2229519:C Nonsynonymous DAMAGING 0.04 3.06 149
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
146 chr2 230341630 + G/T - ENST00000389045 ENSP00000373697 NA EXON CDS rs1803846:A Unknown Not scored NA NA NA
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
147 chr2 43881517 + A/T ATA-tTA ENST00000260605 ENSP00000260605 I230L EXON CDS rs11556157:T Nonsynonymous TOLERATED 0.47 3.19 7
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
148 chr2 43857514 + T/C TTT-TcT ENST00000260605 ENSP00000260605 F33S EXON CDS rs2288709:C Nonsynonymous TOLERATED 0.61 3.33 6
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
149 chr6 88375602 + G/A GTT-aTT ENST00000257789 ENSP00000257789 V217I EXON CDS rs2307389:A Nonsynonymous TOLERATED 0.75 3.17 13
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
150 chr22 29307353 + T/A ACC-tCC ENST00000335214 ENSP00000334612 T264S EXON CDS rs42942:A Nonsynonymous TOLERATED 0.4 3.14 23
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
151 chr10 115912482 + C/A CGA-CtA ENST00000369285 ENSP00000358291 R179L EXON CDS rs12782946:T Nonsynonymous TOLERATED 0.06 4.32 2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
152 chr10 115900918 + G/A CAA-tAA ENST00000369287 ENSP00000358293 Q271* EXON CDS rs7095762:T Nonsynonymous N/A N/A N/A N/A
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
153 chr16 69875502 + G/T ACA-AaA ENST00000338099 ENSP00000337512 T608K EXON CDS rs3096381:T Nonsynonymous TOLERATED 0.12 3.41 3
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
154 etc.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
155
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
156 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
157
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
158 **References**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
159
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
160 Ng PC, Henikoff S. (2001) Predicting deleterious amino acid substitutions.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
161 Genome Res. 11(5):863-74.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
162
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
163 Ng PC, Henikoff S. (2002) Accounting for human polymorphisms predicted to affect protein function.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
164 Genome Res. 12(3):436-46.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
165
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
166 Ng PC, Henikoff S. (2003) SIFT: Predicting amino acid changes that affect protein function.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
167 Nucleic Acids Res. 31(13):3812-4.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
168
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
169 Kumar P, Henikoff S, Ng PC. (2009) Predicting the effects of coding non-synonymous variants
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
170 on protein function using the SIFT algorithm.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
171 Nat Protoc. 4(7):1073-81. Epub 2009 Jun 25.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
172
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
173 </help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
174 </tool>