diff PsiCLASS-1.0.2/samtools-0.1.19/bcftools/vcfutils.pl @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/PsiCLASS-1.0.2/samtools-0.1.19/bcftools/vcfutils.pl	Fri Mar 26 16:52:45 2021 +0000
@@ -0,0 +1,567 @@
+#!/usr/bin/perl -w
+
+# Author: lh3
+
+use strict;
+use warnings;
+use Getopt::Std;
+
+&main;
+exit;
+
+sub main {
+  &usage if (@ARGV < 1);
+  my $command = shift(@ARGV);
+  my %func = (subsam=>\&subsam, listsam=>\&listsam, fillac=>\&fillac, qstats=>\&qstats, varFilter=>\&varFilter,
+			  hapmap2vcf=>\&hapmap2vcf, ucscsnp2vcf=>\&ucscsnp2vcf, filter4vcf=>\&varFilter, ldstats=>\&ldstats,
+			  gapstats=>\&gapstats, splitchr=>\&splitchr, vcf2fq=>\&vcf2fq);
+  die("Unknown command \"$command\".\n") if (!defined($func{$command}));
+  &{$func{$command}};
+}
+
+sub splitchr {
+  my %opts = (l=>5000000);
+  getopts('l:', \%opts);
+  my $l = $opts{l};
+  die(qq/Usage: vcfutils.pl splitchr [-l $opts{l}] <in.fa.fai>\n/) if (@ARGV == 0 && -t STDIN);
+  while (<>) {
+	my @t = split;
+	my $last = 0;
+	for (my $i = 0; $i < $t[1];) {
+	  my $e = ($t[1] - $i) / $l < 1.1? $t[1] : $i + $l;
+	  print "$t[0]:".($i+1)."-$e\n";
+	  $i = $e;
+	}
+  }
+}
+
+sub subsam {
+  die(qq/Usage: vcfutils.pl subsam <in.vcf> [samples]\n/) if (@ARGV == 0);
+  my ($fh, %h);
+  my $fn = shift(@ARGV);
+  my @col;
+  open($fh, ($fn =~ /\.gz$/)? "gzip -dc $fn |" : $fn) || die;
+  $h{$_} = 1 for (@ARGV);
+  while (<$fh>) {
+	if (/^##/) {
+	  print;
+	} elsif (/^#/) {
+	  my @t = split;
+	  my @s = @t[0..8]; # all fixed fields + FORMAT
+	  for (9 .. $#t) {
+		if ($h{$t[$_]}) {
+		  push(@s, $t[$_]);
+		  push(@col, $_);
+		}
+	  }
+	  pop(@s) if (@s == 9); # no sample selected; remove the FORMAT field
+	  print join("\t", @s), "\n";
+	} else {
+	  my @t = split;
+	  if (@col == 0) {
+		print join("\t", @t[0..7]), "\n";
+	  } else {
+		print join("\t", @t[0..8], map {$t[$_]} @col), "\n";
+	  }
+	}
+  }
+  close($fh);
+}
+
+sub listsam {
+  die(qq/Usage: vcfutils.pl listsam <in.vcf>\n/) if (@ARGV == 0 && -t STDIN);
+  while (<>) {
+	if (/^#/ && !/^##/) {
+	  my @t = split;
+	  print join("\n", @t[9..$#t]), "\n";
+	  exit;
+	}
+  }
+}
+
+sub fillac {
+  die(qq/Usage: vcfutils.pl fillac <in.vcf>\n\nNote: The GT field MUST BE present and always appear as the first field.\n/) if (@ARGV == 0 && -t STDIN);
+  while (<>) {
+	if (/^#/) {
+	  print;
+	} else {
+	  my @t = split;
+	  my @c = (0, 0);
+	  my $n = 0;
+	  my $s = -1;
+	  @_ = split(":", $t[8]);
+	  for (0 .. $#_) {
+		if ($_[$_] eq 'GT') { $s = $_; last; }
+	  }
+	  if ($s < 0) {
+		print join("\t", @t), "\n";
+		next;
+	  }
+	  for (9 .. $#t) {
+		if ($t[$_] =~ /^0,0,0/) {
+		} elsif ($t[$_] =~ /^([^\s:]+:){$s}(\d+).(\d+)/) {
+		  ++$c[$2]; ++$c[$3];
+		  $n += 2;
+		}
+	  }
+	  my $AC = "AC=" . join("\t", @c[1..$#c]) . ";AN=$n";
+	  my $info = $t[7];
+	  $info =~ s/(;?)AC=(\d+)//;
+	  $info =~ s/(;?)AN=(\d+)//;
+	  if ($info eq '.') {
+		$info = $AC;
+	  } else {
+		$info .= ";$AC";
+	  }
+	  $t[7] = $info;
+	  print join("\t", @t), "\n";
+	}
+  }
+}
+
+sub ldstats {
+  my %opts = (t=>0.9);
+  getopts('t:', \%opts);
+  die("Usage: vcfutils.pl ldstats [-t $opts{t}] <in.vcf>\n") if (@ARGV == 0 && -t STDIN);
+  my $cutoff = $opts{t};
+  my ($last, $lastchr) = (0x7fffffff, '');
+  my ($x, $y, $n) = (0, 0, 0);
+  while (<>) {
+	if (/^([^#\s]+)\s(\d+)/) {
+	  my ($chr, $pos) = ($1, $2);
+	  if (/NEIR=([\d\.]+)/) {
+		++$n;
+		++$y, $x += $pos - $last if ($lastchr eq $chr && $pos > $last && $1 > $cutoff);
+	  }
+	  $last = $pos; $lastchr = $chr;
+	}
+  }
+  print "Number of SNP intervals in strong LD (r > $opts{t}): $y\n";
+  print "Fraction: ", $y/$n, "\n";
+  print "Length: $x\n";
+}
+
+sub qstats {
+  my %opts = (r=>'', s=>0.02, v=>undef);
+  getopts('r:s:v', \%opts);
+  die("Usage: vcfutils.pl qstats [-r ref.vcf] <in.vcf>\n
+Note: This command discards indels. Output: QUAL #non-indel #SNPs #transitions #joint ts/tv #joint/#ref #joint/#non-indel \n") if (@ARGV == 0 && -t STDIN);
+  my %ts = (AG=>1, GA=>1, CT=>1, TC=>1);
+  my %h = ();
+  my $is_vcf = defined($opts{v})? 1 : 0;
+  if ($opts{r}) { # read the reference positions
+	my $fh;
+	open($fh, $opts{r}) || die;
+	while (<$fh>) {
+	  next if (/^#/);
+	  if ($is_vcf) {
+		my @t = split;
+		$h{$t[0],$t[1]} = $t[4];
+	  } else {
+		$h{$1,$2} = 1 if (/^(\S+)\s+(\d+)/);
+	  }
+	}
+	close($fh);
+  }
+  my $hsize = scalar(keys %h);
+  my @a;
+  while (<>) {
+	next if (/^#/);
+	my @t = split;
+	next if (length($t[3]) != 1 || uc($t[3]) eq 'N');
+	$t[3] = uc($t[3]); $t[4] = uc($t[4]);
+	my @s = split(',', $t[4]);
+	$t[5] = 3 if ($t[5] eq '.' || $t[5] < 0);
+	next if (length($s[0]) != 1);
+	my $hit;
+	if ($is_vcf) {
+	  $hit = 0;
+	  my $aa = $h{$t[0],$t[1]};
+	  if (defined($aa)) {
+		my @aaa = split(",", $aa);
+		for (@aaa) {
+		  $hit = 1 if ($_ eq $s[0]);
+		}
+	  }
+	} else {
+	  $hit = defined($h{$t[0],$t[1]})? 1 : 0;
+	}
+	push(@a, [$t[5], ($t[4] eq '.' || $t[4] eq $t[3])? 0 : 1, $ts{$t[3].$s[0]}? 1 : 0, $hit]);
+  }
+  push(@a, [-1, 0, 0, 0]); # end marker
+  die("[qstats] No SNP data!\n") if (@a == 0);
+  @a = sort {$b->[0]<=>$a->[0]} @a;
+  my $next = $opts{s};
+  my $last = $a[0];
+  my @c = (0, 0, 0, 0);
+  my @lc;
+  $lc[1] = $lc[2] = 0;
+  for my $p (@a) {
+	if ($p->[0] == -1 || ($p->[0] != $last && $c[0]/@a > $next)) {
+	  my @x;
+	  $x[0] = sprintf("%.4f", $c[1]-$c[2]? $c[2] / ($c[1] - $c[2]) : 100);
+	  $x[1] = sprintf("%.4f", $hsize? $c[3] / $hsize : 0);
+	  $x[2] = sprintf("%.4f", $c[3] / $c[1]);
+	  my $a = $c[1] - $lc[1];
+	  my $b = $c[2] - $lc[2];
+	  $x[3] = sprintf("%.4f", $a-$b? $b / ($a-$b) : 100);
+	  print join("\t", $last, @c, @x), "\n";
+	  $next = $c[0]/@a + $opts{s};
+	  $lc[1] = $c[1]; $lc[2] = $c[2];
+	}
+	++$c[0]; $c[1] += $p->[1]; $c[2] += $p->[2]; $c[3] += $p->[3];
+	$last = $p->[0];
+  }
+}
+
+sub varFilter {
+  my %opts = (d=>2, D=>10000000, a=>2, W=>10, Q=>10, w=>3, p=>undef, 1=>1e-4, 2=>1e-100, 3=>0, 4=>1e-4, G=>0, S=>1000, e=>1e-4);
+  getopts('pd:D:W:Q:w:a:1:2:3:4:G:S:e:', \%opts);
+  die(qq/
+Usage:   vcfutils.pl varFilter [options] <in.vcf>
+
+Options: -Q INT    minimum RMS mapping quality for SNPs [$opts{Q}]
+         -d INT    minimum read depth [$opts{d}]
+         -D INT    maximum read depth [$opts{D}]
+         -a INT    minimum number of alternate bases [$opts{a}]
+         -w INT    SNP within INT bp around a gap to be filtered [$opts{w}]
+         -W INT    window size for filtering adjacent gaps [$opts{W}]
+         -1 FLOAT  min P-value for strand bias (given PV4) [$opts{1}]
+         -2 FLOAT  min P-value for baseQ bias [$opts{2}]
+         -3 FLOAT  min P-value for mapQ bias [$opts{3}]
+         -4 FLOAT  min P-value for end distance bias [$opts{4}]
+		 -e FLOAT  min P-value for HWE (plus F<0) [$opts{e}]
+         -p        print filtered variants
+
+Note: Some of the filters rely on annotations generated by SAMtools\/BCFtools.
+\n/) if (@ARGV == 0 && -t STDIN);
+
+  # calculate the window size
+  my ($ol, $ow) = ($opts{W}, $opts{w});
+  my $max_dist = $ol > $ow? $ol : $ow;
+  # the core loop
+  my @staging; # (indel_filtering_score, flt_tag, indel_span; chr, pos, ...)
+  while (<>) {
+	my @t = split;
+    if (/^#/) {
+	  print; next;
+	}
+	next if ($t[4] eq '.'); # skip non-var sites
+    next if ($t[3] eq 'N'); # skip sites with unknown ref ('N')
+	# check if the site is a SNP
+	my $type = 1; # SNP
+	if (length($t[3]) > 1) {
+	  $type = 2; # MNP
+	  my @s = split(',', $t[4]);
+	  for (@s) {
+		$type = 3 if (length != length($t[3]));
+	  }
+	} else {
+	  my @s = split(',', $t[4]);
+	  for (@s) {
+		$type = 3 if (length > 1);
+	  }
+	}
+	# clear the out-of-range elements
+	while (@staging) {
+      # Still on the same chromosome and the first element's window still affects this position?
+	  last if ($staging[0][3] eq $t[0] && $staging[0][4] + $staging[0][2] + $max_dist >= $t[1]);
+	  varFilter_aux(shift(@staging), $opts{p}); # calling a function is a bit slower, not much
+	}
+	my $flt = 0;
+	# parse annotations
+	my ($dp, $mq, $dp_alt) = (-1, -1, -1);
+	if ($t[7] =~ /DP4=(\d+),(\d+),(\d+),(\d+)/i) {
+	  $dp = $1 + $2 + $3 + $4;
+	  $dp_alt = $3 + $4;
+	}
+	if ($t[7] =~ /DP=(\d+)/i) {
+	  $dp = $1;
+	}
+	$mq = $1 if ($t[7] =~ /MQ=(\d+)/i);
+	# the depth and mapQ filter
+	if ($dp >= 0) {
+	  if ($dp < $opts{d}) {
+		$flt = 2;
+	  } elsif ($dp > $opts{D}) {
+		$flt = 3;
+	  }
+	}
+	$flt = 4 if ($dp_alt >= 0 && $dp_alt < $opts{a});
+	$flt = 1 if ($flt == 0 && $mq >= 0 && $mq < $opts{Q});
+	$flt = 7 if ($flt == 0 && /PV4=([^,]+),([^,]+),([^,]+),([^,;\t]+)/
+				 && ($1<$opts{1} || $2<$opts{2} || $3<$opts{3} || $4<$opts{4}));
+	$flt = 8 if ($flt == 0 && ((/MXGQ=(\d+)/ && $1 < $opts{G}) || (/MXSP=(\d+)/ && $1 >= $opts{S})));
+	# HWE filter
+	if ($t[7] =~ /G3=([^;,]+),([^;,]+),([^;,]+).*HWE=([^;,]+)/ && $4 < $opts{e}) {
+		my $p = 2*$1 + $2;
+		my $f = ($p > 0 && $p < 1)? 1 - $2 / ($p * (1-$p)) : 0;
+		$flt = 9 if ($f < 0);
+	}
+
+	my $score = $t[5] * 100 + $dp_alt;
+	my $rlen = length($t[3]) - 1; # $indel_score<0 for SNPs
+	if ($flt == 0) {
+	  if ($type == 3) { # an indel
+		# filtering SNPs and MNPs
+		for my $x (@staging) {
+		  next if (($x->[0]&3) == 3 || $x->[1] || $x->[4] + $x->[2] + $ow < $t[1]);
+		  $x->[1] = 5;
+		}
+		# check the staging list for indel filtering
+		for my $x (@staging) {
+		  next if (($x->[0]&3) != 3 || $x->[1] || $x->[4] + $x->[2] + $ol < $t[1]);
+		  if ($x->[0]>>2 < $score) {
+			$x->[1] = 6;
+		  } else {
+			$flt = 6; last;
+		  }
+		}
+	  } else { # SNP or MNP
+		for my $x (@staging) {
+		  next if (($x->[0]&3) != 3 || $x->[4] + $x->[2] + $ow < $t[1]);
+		  if ($x->[4] + length($x->[7]) - 1 == $t[1] && substr($x->[7], -1, 1) eq substr($t[4], 0, 1)
+			  && length($x->[7]) - length($x->[6]) == 1) {
+			$x->[1] = 5;
+		  } else { $flt = 5; }
+		  last;
+		}
+		# check MNP
+		for my $x (@staging) {
+		  next if (($x->[0]&3) == 3 || $x->[4] + $x->[2] < $t[1]);
+		  if ($x->[0]>>2 < $score) {
+			$x->[1] = 8;
+		  } else {
+			$flt = 8; last;
+		  }
+		}
+	  }
+	}
+	push(@staging, [$score<<2|$type, $flt, $rlen, @t]);
+  }
+  # output the last few elements in the staging list
+  while (@staging) {
+	varFilter_aux(shift @staging, $opts{p});
+  }
+}
+
+sub varFilter_aux {
+  my ($first, $is_print) = @_;
+  if ($first->[1] == 0) {
+	print join("\t", @$first[3 .. @$first-1]), "\n";
+  } elsif ($is_print) {
+	print STDERR join("\t", substr("UQdDaGgPMS", $first->[1], 1), @$first[3 .. @$first-1]), "\n";
+  }
+}
+
+sub gapstats {
+  my (@c0, @c1);
+  $c0[$_] = $c1[$_] = 0 for (0 .. 10000);
+  while (<>) {
+	next if (/^#/);
+	my @t = split;
+	next if (length($t[3]) == 1 && $t[4] =~ /^[A-Za-z](,[A-Za-z])*$/); # not an indel
+	my @s = split(',', $t[4]);
+	for my $x (@s) {
+	  my $l = length($x) - length($t[3]) + 5000;
+	  if ($x =~ /^-/) {
+		$l = -(length($x) - 1) + 5000;
+	  } elsif ($x =~ /^\+/) {
+		$l = length($x) - 1 + 5000;
+	  }
+	  $c0[$l] += 1 / @s;
+	}
+  }
+  for (my $i = 0; $i < 10000; ++$i) {
+	next if ($c0[$i] == 0);
+	$c1[0] += $c0[$i];
+	$c1[1] += $c0[$i] if (($i-5000)%3 == 0);
+	printf("C\t%d\t%.2f\n", ($i-5000), $c0[$i]);
+  }
+  printf("3\t%d\t%d\t%.3f\n", $c1[0], $c1[1], $c1[1]/$c1[0]);
+}
+
+sub ucscsnp2vcf {
+  die("Usage: vcfutils.pl <in.ucsc.snp>\n") if (@ARGV == 0 && -t STDIN);
+  print "##fileformat=VCFv4.0\n";
+  print join("\t", "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"), "\n";
+  while (<>) {
+	my @t = split("\t");
+	my $indel = ($t[9] =~ /^[ACGT](\/[ACGT])+$/)? 0 : 1;
+	my $pos = $t[2] + 1;
+	my @alt;
+	push(@alt, $t[7]);
+	if ($t[6] eq '-') {
+	  $t[9] = reverse($t[9]);
+	  $t[9] =~ tr/ACGTRYMKWSNacgtrymkwsn/TGCAYRKMWSNtgcayrkmwsn/;
+	}
+	my @a = split("/", $t[9]);
+	for (@a) {
+	  push(@alt, $_) if ($_ ne $alt[0]);
+	}
+	if ($indel) {
+	  --$pos;
+	  for (0 .. $#alt) {
+		$alt[$_] =~ tr/-//d;
+		$alt[$_] = "N$alt[$_]";
+	  }
+	}
+	my $ref = shift(@alt);
+	my $af = $t[13] > 0? ";AF=$t[13]" : '';
+	my $valid = ($t[12] eq 'unknown')? '' : ";valid=$t[12]";
+	my $info = "molType=$t[10];class=$t[11]$valid$af";
+	print join("\t", $t[1], $pos, $t[4], $ref, join(",", @alt), 0, '.', $info), "\n";
+  }
+}
+
+sub hapmap2vcf {
+  die("Usage: vcfutils.pl <in.ucsc.snp> <in.hapmap>\n") if (@ARGV == 0);
+  my $fn = shift(@ARGV);
+  # parse UCSC SNP
+  warn("Parsing UCSC SNPs...\n");
+  my ($fh, %map);
+  open($fh, ($fn =~ /\.gz$/)? "gzip -dc $fn |" : $fn) || die;
+  while (<$fh>) {
+	my @t = split;
+	next if ($t[3] - $t[2] != 1); # not SNP
+	@{$map{$t[4]}} = @t[1,3,7];
+  }
+  close($fh);
+  # write VCF
+  warn("Writing VCF...\n");
+  print "##fileformat=VCFv4.0\n";
+  while (<>) {
+	my @t = split;
+	if ($t[0] eq 'rs#') { # the first line
+	  print join("\t", "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT", @t[11..$#t]), "\n";
+	} else {
+	  next unless ($map{$t[0]});
+	  next if (length($t[1]) != 3); # skip non-SNPs
+	  my $a = \@{$map{$t[0]}};
+	  my $ref = $a->[2];
+	  my @u = split('/', $t[1]);
+	  if ($u[1] eq $ref) {
+		$u[1] = $u[0]; $u[0] = $ref;
+	  } elsif ($u[0] ne $ref) { next; }
+	  my $alt = $u[1];
+	  my %w;
+	  $w{$u[0]} = 0; $w{$u[1]} = 1;
+	  my @s = (@$a[0,1], $t[0], $ref, $alt, 0, '.', '.', 'GT');
+	  my $is_tri = 0;
+	  for (@t[11..$#t]) {
+		if ($_ eq 'NN') {
+		  push(@s, './.');
+		} else {
+		  my @a = ($w{substr($_,0,1)}, $w{substr($_,1,1)});
+		  if (!defined($a[0]) || !defined($a[1])) {
+			$is_tri = 1;
+			last;
+		  }
+		  push(@s, "$a[0]/$a[1]");
+		}
+	  }
+	  next if ($is_tri);
+	  print join("\t", @s), "\n";
+	}
+  }
+}
+
+sub vcf2fq {
+  my %opts = (d=>3, D=>100000, Q=>10, l=>5);
+  getopts('d:D:Q:l:', \%opts);
+  die(qq/
+Usage:   vcfutils.pl vcf2fq [options] <all-site.vcf>
+
+Options: -d INT    minimum depth          [$opts{d}]
+         -D INT    maximum depth          [$opts{D}]
+         -Q INT    min RMS mapQ           [$opts{Q}]
+         -l INT    INDEL filtering window [$opts{l}]
+\n/) if (@ARGV == 0 && -t STDIN);
+
+  my ($last_chr, $seq, $qual, $last_pos, @gaps);
+  my $_Q = $opts{Q};
+  my $_d = $opts{d};
+  my $_D = $opts{D};
+
+  my %het = (AC=>'M', AG=>'R', AT=>'W', CA=>'M', CG=>'S', CT=>'Y',
+			 GA=>'R', GC=>'S', GT=>'K', TA=>'W', TC=>'Y', TG=>'K');
+
+  $last_chr = '';
+  while (<>) {
+	next if (/^#/);
+	my @t = split;
+	if ($last_chr ne $t[0]) {
+	  &v2q_post_process($last_chr, \$seq, \$qual, \@gaps, $opts{l}) if ($last_chr);
+	  ($last_chr, $last_pos) = ($t[0], 0);
+	  $seq = $qual = '';
+	  @gaps = ();
+	}
+	die("[vcf2fq] unsorted input\n") if ($t[1] - $last_pos < 0);
+	if ($t[1] - $last_pos > 1) {
+	  $seq .= 'n' x ($t[1] - $last_pos - 1);
+	  $qual .= '!' x ($t[1] - $last_pos - 1);
+	}
+	if (length($t[3]) == 1 && $t[7] !~ /INDEL/ && $t[4] =~ /^([A-Za-z.])(,[A-Za-z])*$/) { # a SNP or reference
+	  my ($ref, $alt) = ($t[3], $1);
+	  my ($b, $q);
+	  $q = $1 if ($t[7] =~ /FQ=(-?[\d\.]+)/);
+	  if ($q < 0) {
+		$_ = ($t[7] =~ /AF1=([\d\.]+)/)? $1 : 0;
+		$b = ($_ < .5 || $alt eq '.')? $ref : $alt;
+		$q = -$q;
+	  } else {
+		$b = $het{"$ref$alt"};
+		$b ||= 'N';
+	  }
+	  $b = lc($b);
+	  $b = uc($b) if (($t[7] =~ /MQ=(\d+)/ && $1 >= $_Q) && ($t[7] =~ /DP=(\d+)/ && $1 >= $_d && $1 <= $_D));
+	  $q = int($q + 33 + .499);
+	  $q = chr($q <= 126? $q : 126);
+	  $seq .= $b;
+	  $qual .= $q;
+	} elsif ($t[4] ne '.') { # an INDEL
+	  push(@gaps, [$t[1], length($t[3])]);
+	}
+	$last_pos = $t[1];
+  }
+  &v2q_post_process($last_chr, \$seq, \$qual, \@gaps, $opts{l});
+}
+
+sub v2q_post_process {
+  my ($chr, $seq, $qual, $gaps, $l) = @_;
+  for my $g (@$gaps) {
+	my $beg = $g->[0] > $l? $g->[0] - $l : 0;
+	my $end = $g->[0] + $g->[1] + $l;
+	$end = length($$seq) if ($end > length($$seq));
+	substr($$seq, $beg, $end - $beg) = lc(substr($$seq, $beg, $end - $beg));
+  }
+  print "\@$chr\n"; &v2q_print_str($seq);
+  print "+\n"; &v2q_print_str($qual);
+}
+
+sub v2q_print_str {
+  my ($s) = @_;
+  my $l = length($$s);
+  for (my $i = 0; $i < $l; $i += 60) {
+	print substr($$s, $i, 60), "\n";
+  }
+}
+
+sub usage {
+  die(qq/
+Usage:   vcfutils.pl <command> [<arguments>]\n
+Command: subsam       get a subset of samples
+         listsam      list the samples
+         fillac       fill the allele count field
+         qstats       SNP stats stratified by QUAL
+
+         hapmap2vcf   convert the hapmap format to VCF
+         ucscsnp2vcf  convert UCSC SNP SQL dump to VCF
+
+         varFilter    filtering short variants (*)
+         vcf2fq       VCF->fastq (**)
+
+Notes: Commands with description endting with (*) may need bcftools
+       specific annotations.
+\n/);
+}