annotate gsmapper2gff.sh @ 1:a0689dc29b7f draft

Updated vcf to gff conversion tool
author john-mccallum
date Tue, 31 Jul 2012 00:33:11 -0400
parents 21053f7f9ed1
children b321e0517be3
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
1 #!/bin/sh
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
2 ##convert gsMapper output into gff3/GVF format
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
3
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
4 #New Zealand Institute for Plant and Food Research
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
5 #This program is free software: you can redistribute it and/or modify
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
6 # it under the terms of the GNU General Public License as published by
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
7 # the Free Software Foundation, either version 3 of the License, or
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
8 # (at your option) any later version.
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
9 #
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
10 # This program is distributed in the hope that it will be useful,
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
13 # GNU General Public License for more details.
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
14 #
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
15 # You should have received a copy of the GNU General Public License
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
17
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
18
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
19 infile=$1
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
20 outfile=$2
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
21
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
22 awk '
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
23 BEGIN {OFS="\t"}
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
24 /^>/ && sub(/%/,"",$7) {
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
25 ID=substr($1,2)
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
26 if (length($4) > 1 || match($4,"-") || length($5) > 1 || match($5,"-"))
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
27 type="indel"
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
28 else
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
29 type="SNP"
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
30 start=$2
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
31 end=$3
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
32 Col9_ID=ID ":gsmapper:" type ":"start
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
33
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
34 Reference_seq=$4
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
35 Variant_seq=$5
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
36 Total_reads=$6
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
37 Variant_reads=Total_reads * $7 /100 - (Total_reads * $7 % 100)/100
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
38
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
39
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
40
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
41 print ID,"gsmapper",type,start,end,".",".",".","ID="Col9_ID";Reference_seq="Reference_seq";Variant_seq="Variant_seq";Total_reads="Total_reads";Variant_reads="Variant_reads
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
42 }' "$infile" > "$outfile"
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
43
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
44
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
45
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
46
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
47
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
48
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
49
21053f7f9ed1 First upload of PCR Marker tools
john-mccallum
parents:
diff changeset
50