annotate pyPRADA_1.2/tools/samtools-0.1.16/misc/blast2sam.pl @ 0:acc2ca1a3ba4

Uploaded
author siyuan
date Thu, 20 Feb 2014 00:44:58 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
1 #!/usr/bin/perl -w
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
2
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
3 use strict;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
4 use warnings;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
5 use Getopt::Std;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
6
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
7 &blast2sam;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
8
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
9 sub blast2sam {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
10 my %opts = ();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
11 getopts('s', \%opts);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
12 die("Usage: blast2sam.pl <in.blastn>\n") if (-t STDIN && @ARGV == 0);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
13 my ($qlen, $slen, $q, $s, $qbeg, $qend, @sam, @cigar, @cmaux, $show_seq);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
14 $show_seq = defined($opts{s});
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
15 @sam = (); @sam[0,4,6..8,10] = ('', 255, '*', 0, 0, '*');
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
16 while (<>) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
17 if (@cigar && (/^Query=/ || /Score =.*bits.*Expect/)) { # print
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
18 &blast_print_sam(\@sam, \@cigar, \@cmaux, $qlen - $qend);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
19 @cigar = ();
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
20 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
21 if (/^Query= (\S+)/) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
22 $sam[0] = $1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
23 } elsif (/\((\S+)\s+letters\)/) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
24 $qlen = $1; $qlen =~ s/,//g;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
25 } elsif (/^>(\S+)/) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
26 $sam[2] = $1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
27 } elsif (/Length = (\d+)/) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
28 $slen = $1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
29 } elsif (/Score =\s+(\S+) bits.+Expect(\(\d+\))? = (\S+)/) { # the start of an alignment block
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
30 my ($as, $ev) = (int($1 + .499), $3);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
31 $ev = "1$ev" if ($ev =~ /^e/);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
32 @sam[1,3,9,11,12] = (0, 0, '', "AS:i:$as", "EV:Z:$ev");
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
33 @cigar = (); $qbeg = 0;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
34 @cmaux = (0, 0, 0, '');
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
35 } elsif (/Strand = (\S+) \/ (\S+)/) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
36 $sam[1] |= 0x10 if ($2 eq 'Minus');
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
37 } elsif (/Query\:\s(\d+)\s*(\S+)\s(\d+)/) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
38 $q = $2;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
39 unless ($qbeg) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
40 $qbeg = $1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
41 push(@cigar, ($1-1) . "H") if ($1 > 1);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
42 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
43 $qend = $3;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
44 if ($show_seq) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
45 my $x = $q;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
46 $x =~ s/-//g; $sam[9] .= $x;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
47 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
48 } elsif (/Sbjct\:\s(\d+)\s*(\S+)\s(\d+)/) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
49 $s = $2;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
50 if ($sam[1] & 0x10) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
51 $sam[3] = $3;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
52 } else {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
53 $sam[3] = $1 unless ($sam[3]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
54 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
55 &aln2cm(\@cigar, \$q, \$s, \@cmaux);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
56 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
57 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
58 &blast_print_sam(\@sam, \@cigar, \@cmaux, $qlen - $qend);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
59 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
60
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
61 sub blast_print_sam {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
62 my ($sam, $cigar, $cmaux, $qrest) = @_;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
63 push(@$cigar, $cmaux->[1] . substr("MDI", $cmaux->[0], 1));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
64 push(@$cigar, $qrest . 'H') if ($qrest);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
65 if ($sam->[1] & 0x10) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
66 @$cigar = reverse(@$cigar);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
67 $sam->[9] = reverse($sam->[9]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
68 $sam->[9] =~ tr/atgcrymkswATGCRYMKSW/tacgyrkmswTACGYRKMSW/;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
69 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
70 $sam->[9] = '*' if (!$sam->[9]);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
71 $sam->[5] = join('', @$cigar);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
72 print join("\t", @$sam), "\n";
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
73 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
74
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
75 sub aln2cm {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
76 my ($cigar, $q, $s, $cmaux) = @_;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
77 my $l = length($$q);
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
78 for (my $i = 0; $i < $l; ++$i) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
79 my $op;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
80 # set $op
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
81 if (substr($$q, $i, 1) eq '-') { $op = 2; }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
82 elsif (substr($$s, $i, 1) eq '-') { $op = 1; }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
83 else { $op = 0; }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
84 # for CIGAR
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
85 if ($cmaux->[0] == $op) {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
86 ++$cmaux->[1];
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
87 } else {
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
88 push(@$cigar, $cmaux->[1] . substr("MDI", $cmaux->[0], 1));
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
89 $cmaux->[0] = $op; $cmaux->[1] = 1;
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
90 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
91 }
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
92 }