view pyPRADA_1.2/tools/samtools-0.1.16/misc/wgsim_eval.pl @ 0:acc2ca1a3ba4

Uploaded
author siyuan
date Thu, 20 Feb 2014 00:44:58 -0500
parents
children
line wrap: on
line source

#!/usr/bin/perl -w

# Contact: lh3
# Version: 0.1.5

use strict;
use warnings;
use Getopt::Std;

&wgsim_eval;
exit;

sub wgsim_eval {
  my %opts = (g=>5);
  getopts('pcag:', \%opts);
  die("Usage: wgsim_eval.pl [-pca] [-g $opts{g}] <in.sam>\n") if (@ARGV == 0 && -t STDIN);
  my (@c0, @c1, %fnfp);
  my ($max_q, $flag) = (0, 0);
  my $gap = $opts{g};
  $flag |= 1 if (defined $opts{p});
  $flag |= 2 if (defined $opts{c});
  while (<>) {
	next if (/^\@/);
	my @t = split("\t");
	next if (@t < 11);
	my $line = $_;
	my ($q, $is_correct, $chr, $left, $rght) = (int($t[4]/10), 1, $t[2], $t[3], $t[3]);
	$max_q = $q if ($q > $max_q);
	# right coordinate
	$_ = $t[5]; s/(\d+)[MDN]/$rght+=$1,'x'/eg;
	--$rght;
	# correct for soft clipping
	my ($left0, $rght0) = ($left, $rght);
	$left -= $1 if (/^(\d+)[SH]/);
	$rght += $1 if (/(\d+)[SH]$/);
	$left0 -= $1 if (/(\d+)[SH]$/);
	$rght0 += $1 if (/^(\d+)[SH]/);
	# skip unmapped reads
	next if (($t[1]&0x4) || $chr eq '*');
	# parse read name and check
	if ($t[0] =~ /^(\S+)_(\d+)_(\d+)_/) {
	  if ($1 ne $chr) { # different chr
		$is_correct = 0;
	  } else {
		if ($flag & 2) {
		  if (($t[1]&0x40) && !($t[1]&0x10)) { # F3, forward
			$is_correct = 0 if (abs($2 - $left) > $gap && abs($2 - $left0) > $gap);
		  } elsif (($t[1]&0x40) && ($t[1]&0x10)) { # F3, reverse
			$is_correct = 0 if (abs($3 - $rght) > $gap && abs($3 - $rght0) > $gap);
		  } elsif (($t[1]&0x80) && !($t[1]&0x10)) { # R3, forward
			$is_correct = 0 if (abs($3 - $left) > $gap && abs($3 - $left0) > $gap);
		  } else { # R3, reverse
			$is_correct = 0 if (abs($2 - $rght) > $gap && abs($3 - $rght0) > $gap);
		  }
		} else {
		  if ($t[1] & 0x10) { # reverse
			$is_correct = 0 if (abs($3 - $rght) > $gap && abs($3 - $rght0) > $gap); # in case of indels that are close to the end of a reads
		  } else {
			$is_correct = 0 if (abs($2 - $left) > $gap && abs($2 - $left0) > $gap);
		  }
		}
	  }
	} else {
	  warn("[wgsim_eval] read '$t[0]' was not generated by wgsim?\n");
	  next;
	}
	++$c0[$q];
	++$c1[$q] unless ($is_correct);
	@{$fnfp{$t[4]}} = (0, 0) unless (defined $fnfp{$t[4]});
	++$fnfp{$t[4]}[0];
	++$fnfp{$t[4]}[1] unless ($is_correct);
	print STDERR $line if (($flag&1) && !$is_correct && $q > 0);
  }
  # print
  my ($cc0, $cc1) = (0, 0);
  if (!defined($opts{a})) {
	for (my $i = $max_q; $i >= 0; --$i) {
	  $c0[$i] = 0 unless (defined $c0[$i]);
	  $c1[$i] = 0 unless (defined $c1[$i]);
	  $cc0 += $c0[$i]; $cc1 += $c1[$i];
	  printf("%.2dx %12d / %-12d  %12d  %.3e\n", $i, $c1[$i], $c0[$i], $cc0, $cc1/$cc0) if ($cc0);
	}
  } else {
	for (reverse(sort {$a<=>$b} (keys %fnfp))) {
	  next if ($_ == 0);
	  $cc0 += $fnfp{$_}[0];
	  $cc1 += $fnfp{$_}[1];
	  print join("\t", $_, $cc0, $cc1), "\n";
	}
  }
}