annotate tools/evolution/codingSnps.xml @ 2:c2a356708570

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:42 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 <tool id="hgv_codingSnps" name="aaChanges" version="1.0.0">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 <description>amino-acid changes caused by a set of SNPs</description>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 <command interpreter="perl">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 codingSnps.pl $input1 $input2 Galaxy build=${input1.metadata.dbkey} loc=${GALAXY_DATA_INDEX_DIR}/codingSnps.loc chr=${input1.metadata.chromCol} start=${input1.metadata.startCol} end=${input1.metadata.endCol} snp=$col1 > $out_file1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 </command>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 <inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 <param format="interval" name="input1" type="data" label="SNP dataset">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 <validator type="dataset_metadata_in_file" filename="codingSnps.loc" metadata_name="dbkey" metadata_column="0" message="Sequences are not currently available for the specified build." split="\t" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 <param name="col1" type="data_column" data_ref="input1" label="Column with SNPs" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 <param format="interval" name="input2" type="data" label="Gene dataset">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 <validator type="dataset_metadata_in_file" filename="codingSnps.loc" metadata_name="dbkey" metadata_column="0" message="Sequences are not currently available for the specified build." split="\t" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15 </param>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 </inputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 <outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 <data format="tabular" name="out_file1" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 </outputs>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 <code file="codingSnps_filter.py"></code>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 <requirements>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 <requirement type="binary">cat</requirement>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 <requirement type="binary">sort</requirement>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 <requirement type="package">ucsc_tools</requirement>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 </requirements>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 <tests>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 <param name="input1" ftype="interval" value="codingSnps_input1.interval" dbkey="hg18" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 <param name="col1" value="6" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 <param name="input2" ftype="interval" value="codingSnps_inputGenes1.bed" dbkey="hg18" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 <output name="output" file="codingSnps_output1.interval" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 <test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 <param name="input1" ftype="interval" value="codingSnps_input2.interval" dbkey="hg18" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 <param name="input2" ftype="interval" value="codingSnps_inputGenes2.bed" dbkey="hg18" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 <param name="col1" value="4" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 <output name="output" file="codingSnps_output2.interval" />
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 </test>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 </tests>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 <help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 .. class:: infomark
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 The build must be defined for the input files and must be the same for both files.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 Use the pencil icon to add the build to the files if necessary.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 **Dataset formats**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 The SNP dataset is in interval_ format, with a column of SNPs as described below.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 The gene dataset is in BED_ format with 12 columns. The output dataset is also interval.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 (`Dataset missing?`_)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 .. _interval: ./static/formatHelp.html#interval
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 .. _BED: ./static/formatHelp.html#bed
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 .. _Dataset missing?: ./static/formatHelp.html
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 **What it does**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 This tool identifies which SNPs create amino-acid changes in the specified
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 coding regions. The first input file contains the SNPs and must be an interval file.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 It needs the chromosome, start, and end position as well as the SNP. The
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 SNP can be given using ambiguous-nucleotide symbols or a list of two to four
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 alleles
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 separated by '/'. Any other columns in the first input file will not be
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 used but will be kept for the output. The second input file contains the genes
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 to be used for defining the coding regions. This file must be a BED file with
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 the first 12 columns standard BED columns. The output is the same as the
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 first input file with
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 several columns added: the name field from the line of the gene input file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 used, the amino acids, the codon number, and the reference nucleotide that
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 changed in the amino acid.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 The amino acids are listed with the reference amino acid first, then a colon,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 and then the amino acids for the alleles. If a SNP is not in a coding region
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 or is synonymous then it is not included in the output file.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 -----
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86 **Example**
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88 - first input file, with SNPs::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90 chr22 15660821 15660822 A/G
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91 chr22 15825725 15825726 G/T
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92 chr22 15827035 15827036 G
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93 chr22 15827135 15827136 C/G
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94 chr22 15830928 15830929 A/G
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
95 chr22 15830951 15830952 G
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
96 chr22 15830955 15830956 C/T
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
97 chr22 15848885 15848886 C/T
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
98 chr22 15849048 15849049 A/C
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
99 chr22 15919711 15919712 A/G
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
100 etc.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
101
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
102 or, indicating polymorphisms using ambiguous-nucleotide symbols::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
103
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
104 chr22 15660821 15660822 R
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
105 chr22 15825725 15825726 K
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
106 chr22 15827035 15827036 G
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
107 chr22 15827135 15827136 S
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
108 chr22 15830928 15830929 R
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
109 chr22 15830951 15830952 G
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
110 chr22 15830955 15830956 Y
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
111 chr22 15848885 15848886 Y
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
112 chr22 15849048 15849049 M
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
113 chr22 15919711 15919712 R
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
114 etc.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
115
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
116 - second input file, with UCSC annotations for human genes::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
117
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
118 chr22 15688363 15690225 uc010gqr.1 0 + 15688363 15688363 0 2 587,794, 0,1068,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
119 chr22 15822826 15869112 uc002zlw.1 0 - 15823622 15869004 0 10 940,105,97,91,265,86,251,208,304,282, 0,1788,2829,3241,4163,6361,8006,26023,29936,46004,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
120 chr22 15826991 15869112 uc010gqs.1 0 - 15829218 15869004 0 5 1380,86,157,304,282, 0,2196,21858,25771,41839,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
121 chr22 15897459 15919682 uc002zlx.1 0 + 15897459 15897459 0 4 775,128,103,1720, 0,8303,10754,20503,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
122 chr22 15945848 15971389 uc002zly.1 0 + 15945981 15970710 0 13 271,25,147,113,127,48,164,84,85,12,102,42,2193, 0,12103,12838,13816,15396,17037,17180,18535,19767,20632,20894,22768,23348,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
123 etc.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
124
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
125 - output file, showing non-synonymous substitutions in coding regions::
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
126
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
127 chr22 15825725 15825726 G/T uc002zlw.1 Gln:Pro/Gln 469 T
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
128 chr22 15827035 15827036 G uc002zlw.1 Glu:Asp 414 C
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
129 chr22 15827135 15827136 C/G uc002zlw.1 Gly:Gly/Ala 381 C
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
130 chr22 15830928 15830929 A/G uc002zlw.1 Ala:Ser/Pro 281 C
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
131 chr22 15830951 15830952 G uc002zlw.1 Leu:Pro 273 A
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
132 chr22 15830955 15830956 C/T uc002zlw.1 Ser:Gly/Ser 272 T
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
133 chr22 15848885 15848886 C/T uc002zlw.1 Ser:Trp/Stop 217 G
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
134 chr22 15848885 15848886 C/T uc010gqs.1 Ser:Trp/Stop 200 G
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
135 chr22 15849048 15849049 A/C uc002zlw.1 Gly:Stop/Gly 163 C
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
136 etc.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
137
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
138 </help>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
139 </tool>