5
|
1 #!/bin/sh
|
|
2 ##convert vcf to gvf
|
|
3 ##NOTE This is a very simple basic parser for a complex format.
|
|
4 #It is intended for use with mpileup output where -g or -u flags are NOT used.
|
|
5
|
|
6 ##usage vcf2gvf.sh <vcf file> <outputfile>
|
|
7
|
|
8 #Copyright 2012 John McCallum & Leshi Chen
|
|
9 #New Zealand Institute for Plant and Food Research
|
|
10
|
|
11 #New Zealand Institute for Plant and Food Research
|
|
12 #This program is free software: you can redistribute it and/or modify
|
|
13 # it under the terms of the GNU General Public License as published by
|
|
14 # the Free Software Foundation, either version 3 of the License, or
|
|
15 # (at your option) any later version.
|
|
16 #
|
|
17 # This program is distributed in the hope that it will be useful,
|
|
18 # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
19 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
20 # GNU General Public License for more details.
|
|
21 #
|
|
22 # You should have received a copy of the GNU General Public License
|
|
23 # along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
24
|
|
25
|
|
26
|
|
27 inputfile=$1
|
|
28 outputfile=$2
|
|
29
|
|
30 echo "##gvf-version 1.05" > $outputfile
|
|
31
|
|
32 awk '
|
|
33 BEGIN {OFS="\t"}
|
|
34
|
|
35 ##get feature type
|
|
36 {if (index($8,"INDEL")== 1) {type="INDEL"} else {type="SNP"} }
|
|
37 ##get feature length
|
|
38 {if (type=="SNP")
|
|
39 {feat_length=1}
|
|
40 else {feat_length=length($4)}
|
|
41 }
|
|
42 {end=($2+feat_length)}
|
|
43
|
|
44 !/^#/ { print $1 ,"SAMTOOLS",type,$2,end,$6,".",".","ID="$1":SAMTOOLS:"type":"$2";Variant_seq="$5";Reference_seq="$4";"$8}
|
|
45
|
|
46 END {print ""}
|
|
47 ' "$inputfile" > "$outputfile" |