Mercurial > repos > siyuan > prada
view pyPRADA_1.2/tools/samtools-0.1.16/misc/wgsim_eval.pl @ 3:f17965495ec9 draft default tip
Uploaded
author | siyuan |
---|---|
date | Tue, 11 Mar 2014 12:14:01 -0400 |
parents | acc2ca1a3ba4 |
children |
line wrap: on
line source
#!/usr/bin/perl -w # Contact: lh3 # Version: 0.1.5 use strict; use warnings; use Getopt::Std; &wgsim_eval; exit; sub wgsim_eval { my %opts = (g=>5); getopts('pcag:', \%opts); die("Usage: wgsim_eval.pl [-pca] [-g $opts{g}] <in.sam>\n") if (@ARGV == 0 && -t STDIN); my (@c0, @c1, %fnfp); my ($max_q, $flag) = (0, 0); my $gap = $opts{g}; $flag |= 1 if (defined $opts{p}); $flag |= 2 if (defined $opts{c}); while (<>) { next if (/^\@/); my @t = split("\t"); next if (@t < 11); my $line = $_; my ($q, $is_correct, $chr, $left, $rght) = (int($t[4]/10), 1, $t[2], $t[3], $t[3]); $max_q = $q if ($q > $max_q); # right coordinate $_ = $t[5]; s/(\d+)[MDN]/$rght+=$1,'x'/eg; --$rght; # correct for soft clipping my ($left0, $rght0) = ($left, $rght); $left -= $1 if (/^(\d+)[SH]/); $rght += $1 if (/(\d+)[SH]$/); $left0 -= $1 if (/(\d+)[SH]$/); $rght0 += $1 if (/^(\d+)[SH]/); # skip unmapped reads next if (($t[1]&0x4) || $chr eq '*'); # parse read name and check if ($t[0] =~ /^(\S+)_(\d+)_(\d+)_/) { if ($1 ne $chr) { # different chr $is_correct = 0; } else { if ($flag & 2) { if (($t[1]&0x40) && !($t[1]&0x10)) { # F3, forward $is_correct = 0 if (abs($2 - $left) > $gap && abs($2 - $left0) > $gap); } elsif (($t[1]&0x40) && ($t[1]&0x10)) { # F3, reverse $is_correct = 0 if (abs($3 - $rght) > $gap && abs($3 - $rght0) > $gap); } elsif (($t[1]&0x80) && !($t[1]&0x10)) { # R3, forward $is_correct = 0 if (abs($3 - $left) > $gap && abs($3 - $left0) > $gap); } else { # R3, reverse $is_correct = 0 if (abs($2 - $rght) > $gap && abs($3 - $rght0) > $gap); } } else { if ($t[1] & 0x10) { # reverse $is_correct = 0 if (abs($3 - $rght) > $gap && abs($3 - $rght0) > $gap); # in case of indels that are close to the end of a reads } else { $is_correct = 0 if (abs($2 - $left) > $gap && abs($2 - $left0) > $gap); } } } } else { warn("[wgsim_eval] read '$t[0]' was not generated by wgsim?\n"); next; } ++$c0[$q]; ++$c1[$q] unless ($is_correct); @{$fnfp{$t[4]}} = (0, 0) unless (defined $fnfp{$t[4]}); ++$fnfp{$t[4]}[0]; ++$fnfp{$t[4]}[1] unless ($is_correct); print STDERR $line if (($flag&1) && !$is_correct && $q > 0); } # print my ($cc0, $cc1) = (0, 0); if (!defined($opts{a})) { for (my $i = $max_q; $i >= 0; --$i) { $c0[$i] = 0 unless (defined $c0[$i]); $c1[$i] = 0 unless (defined $c1[$i]); $cc0 += $c0[$i]; $cc1 += $c1[$i]; printf("%.2dx %12d / %-12d %12d %.3e\n", $i, $c1[$i], $c0[$i], $cc0, $cc1/$cc0) if ($cc0); } } else { for (reverse(sort {$a<=>$b} (keys %fnfp))) { next if ($_ == 0); $cc0 += $fnfp{$_}[0]; $cc1 += $fnfp{$_}[1]; print join("\t", $_, $cc0, $cc1), "\n"; } } }