0
|
1 <tool id="hgv_codingSnps" name="aaChanges" version="1.0.0">
|
|
2 <description>amino-acid changes caused by a set of SNPs</description>
|
|
3
|
|
4 <command interpreter="perl">
|
|
5 codingSnps.pl $input1 $input2 Galaxy build=${input1.metadata.dbkey} loc=${GALAXY_DATA_INDEX_DIR}/codingSnps.loc chr=${input1.metadata.chromCol} start=${input1.metadata.startCol} end=${input1.metadata.endCol} snp=$col1 > $out_file1
|
|
6 </command>
|
|
7
|
|
8 <inputs>
|
|
9 <param format="interval" name="input1" type="data" label="SNP dataset">
|
|
10 <validator type="dataset_metadata_in_file" filename="codingSnps.loc" metadata_name="dbkey" metadata_column="0" message="Sequences are not currently available for the specified build." split="\t" />
|
|
11 </param>
|
|
12 <param name="col1" type="data_column" data_ref="input1" label="Column with SNPs" />
|
|
13 <param format="interval" name="input2" type="data" label="Gene dataset">
|
|
14 <validator type="dataset_metadata_in_file" filename="codingSnps.loc" metadata_name="dbkey" metadata_column="0" message="Sequences are not currently available for the specified build." split="\t" />
|
|
15 </param>
|
|
16 </inputs>
|
|
17
|
|
18 <outputs>
|
|
19 <data format="tabular" name="out_file1" />
|
|
20 </outputs>
|
|
21
|
|
22 <code file="codingSnps_filter.py"></code>
|
|
23
|
|
24 <requirements>
|
|
25 <requirement type="binary">cat</requirement>
|
|
26 <requirement type="binary">sort</requirement>
|
|
27 <requirement type="package">ucsc_tools</requirement>
|
|
28 </requirements>
|
|
29
|
|
30 <tests>
|
|
31 <test>
|
|
32 <param name="input1" ftype="interval" value="codingSnps_input1.interval" dbkey="hg18" />
|
|
33 <param name="col1" value="6" />
|
|
34 <param name="input2" ftype="interval" value="codingSnps_inputGenes1.bed" dbkey="hg18" />
|
|
35 <output name="output" file="codingSnps_output1.interval" />
|
|
36 </test>
|
|
37 <test>
|
|
38 <param name="input1" ftype="interval" value="codingSnps_input2.interval" dbkey="hg18" />
|
|
39 <param name="input2" ftype="interval" value="codingSnps_inputGenes2.bed" dbkey="hg18" />
|
|
40 <param name="col1" value="4" />
|
|
41 <output name="output" file="codingSnps_output2.interval" />
|
|
42 </test>
|
|
43 </tests>
|
|
44
|
|
45 <help>
|
|
46 .. class:: infomark
|
|
47
|
|
48 The build must be defined for the input files and must be the same for both files.
|
|
49 Use the pencil icon to add the build to the files if necessary.
|
|
50
|
|
51 -----
|
|
52
|
|
53 **Dataset formats**
|
|
54
|
|
55 The SNP dataset is in interval_ format, with a column of SNPs as described below.
|
|
56 The gene dataset is in BED_ format with 12 columns. The output dataset is also interval.
|
|
57 (`Dataset missing?`_)
|
|
58
|
|
59 .. _interval: ./static/formatHelp.html#interval
|
|
60 .. _BED: ./static/formatHelp.html#bed
|
|
61 .. _Dataset missing?: ./static/formatHelp.html
|
|
62
|
|
63 -----
|
|
64
|
|
65 **What it does**
|
|
66
|
|
67 This tool identifies which SNPs create amino-acid changes in the specified
|
|
68 coding regions. The first input file contains the SNPs and must be an interval file.
|
|
69 It needs the chromosome, start, and end position as well as the SNP. The
|
|
70 SNP can be given using ambiguous-nucleotide symbols or a list of two to four
|
|
71 alleles
|
|
72 separated by '/'. Any other columns in the first input file will not be
|
|
73 used but will be kept for the output. The second input file contains the genes
|
|
74 to be used for defining the coding regions. This file must be a BED file with
|
|
75 the first 12 columns standard BED columns. The output is the same as the
|
|
76 first input file with
|
|
77 several columns added: the name field from the line of the gene input file
|
|
78 used, the amino acids, the codon number, and the reference nucleotide that
|
|
79 changed in the amino acid.
|
|
80 The amino acids are listed with the reference amino acid first, then a colon,
|
|
81 and then the amino acids for the alleles. If a SNP is not in a coding region
|
|
82 or is synonymous then it is not included in the output file.
|
|
83
|
|
84 -----
|
|
85
|
|
86 **Example**
|
|
87
|
|
88 - first input file, with SNPs::
|
|
89
|
|
90 chr22 15660821 15660822 A/G
|
|
91 chr22 15825725 15825726 G/T
|
|
92 chr22 15827035 15827036 G
|
|
93 chr22 15827135 15827136 C/G
|
|
94 chr22 15830928 15830929 A/G
|
|
95 chr22 15830951 15830952 G
|
|
96 chr22 15830955 15830956 C/T
|
|
97 chr22 15848885 15848886 C/T
|
|
98 chr22 15849048 15849049 A/C
|
|
99 chr22 15919711 15919712 A/G
|
|
100 etc.
|
|
101
|
|
102 or, indicating polymorphisms using ambiguous-nucleotide symbols::
|
|
103
|
|
104 chr22 15660821 15660822 R
|
|
105 chr22 15825725 15825726 K
|
|
106 chr22 15827035 15827036 G
|
|
107 chr22 15827135 15827136 S
|
|
108 chr22 15830928 15830929 R
|
|
109 chr22 15830951 15830952 G
|
|
110 chr22 15830955 15830956 Y
|
|
111 chr22 15848885 15848886 Y
|
|
112 chr22 15849048 15849049 M
|
|
113 chr22 15919711 15919712 R
|
|
114 etc.
|
|
115
|
|
116 - second input file, with UCSC annotations for human genes::
|
|
117
|
|
118 chr22 15688363 15690225 uc010gqr.1 0 + 15688363 15688363 0 2 587,794, 0,1068,
|
|
119 chr22 15822826 15869112 uc002zlw.1 0 - 15823622 15869004 0 10 940,105,97,91,265,86,251,208,304,282, 0,1788,2829,3241,4163,6361,8006,26023,29936,46004,
|
|
120 chr22 15826991 15869112 uc010gqs.1 0 - 15829218 15869004 0 5 1380,86,157,304,282, 0,2196,21858,25771,41839,
|
|
121 chr22 15897459 15919682 uc002zlx.1 0 + 15897459 15897459 0 4 775,128,103,1720, 0,8303,10754,20503,
|
|
122 chr22 15945848 15971389 uc002zly.1 0 + 15945981 15970710 0 13 271,25,147,113,127,48,164,84,85,12,102,42,2193, 0,12103,12838,13816,15396,17037,17180,18535,19767,20632,20894,22768,23348,
|
|
123 etc.
|
|
124
|
|
125 - output file, showing non-synonymous substitutions in coding regions::
|
|
126
|
|
127 chr22 15825725 15825726 G/T uc002zlw.1 Gln:Pro/Gln 469 T
|
|
128 chr22 15827035 15827036 G uc002zlw.1 Glu:Asp 414 C
|
|
129 chr22 15827135 15827136 C/G uc002zlw.1 Gly:Gly/Ala 381 C
|
|
130 chr22 15830928 15830929 A/G uc002zlw.1 Ala:Ser/Pro 281 C
|
|
131 chr22 15830951 15830952 G uc002zlw.1 Leu:Pro 273 A
|
|
132 chr22 15830955 15830956 C/T uc002zlw.1 Ser:Gly/Ser 272 T
|
|
133 chr22 15848885 15848886 C/T uc002zlw.1 Ser:Trp/Stop 217 G
|
|
134 chr22 15848885 15848886 C/T uc010gqs.1 Ser:Trp/Stop 200 G
|
|
135 chr22 15849048 15849049 A/C uc002zlw.1 Gly:Stop/Gly 163 C
|
|
136 etc.
|
|
137
|
|
138 </help>
|
|
139 </tool>
|