0
|
1 #!/bin/sh
|
|
2 ##convert vcf to gvf
|
|
3 ##NOTE This is a very simple basic parser for a complex format.
|
|
4
|
|
5 ##usage vcf2gvf.sh <vcf file> <outputfile>
|
|
6
|
|
7 #Copyright 2012 John McCallum & Leshi Chen
|
|
8 #New Zealand Institute for Plant and Food Research
|
|
9
|
|
10 #New Zealand Institute for Plant and Food Research
|
|
11 #This program is free software: you can redistribute it and/or modify
|
|
12 # it under the terms of the GNU General Public License as published by
|
|
13 # the Free Software Foundation, either version 3 of the License, or
|
|
14 # (at your option) any later version.
|
|
15 #
|
|
16 # This program is distributed in the hope that it will be useful,
|
|
17 # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
18 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
19 # GNU General Public License for more details.
|
|
20 #
|
|
21 # You should have received a copy of the GNU General Public License
|
|
22 # along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
23
|
|
24
|
|
25
|
|
26 inputfile=$1
|
|
27 outputfile=$2
|
|
28
|
|
29 echo "##gvf-version 1.05" > $outputfile
|
|
30
|
|
31 awk '
|
|
32 BEGIN {OFS="\t"}
|
|
33
|
|
34 ##get feature type
|
|
35 {if (index($8,"INDEL")== 1) {type="INDEL"} else {type="SNP"} }
|
|
36 ##get feature length
|
|
37 {if (type=="SNP")
|
|
38 {feat_length=1}
|
|
39 else {feat_length=length($4)}
|
|
40 }
|
|
41 {end=($2+feat_length)}
|
|
42
|
|
43 !/^#/ { print $1 ,"SAMTOOLS",type,$2,end,$6,".",".","ID="$1":SAMTOOLS:"type":"$2";Variant_seq="$5";Reference_seq="$4";"$8}
|
|
44
|
|
45 END {print ""}
|
|
46 ' "$inputfile" > "$outputfile" |