Mercurial > repos > lsong10 > psiclass
comparison PsiCLASS-1.0.2/samtools-0.1.19/misc/blast2sam.pl @ 0:903fc43d6227 draft default tip
Uploaded
author | lsong10 |
---|---|
date | Fri, 26 Mar 2021 16:52:45 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:903fc43d6227 |
---|---|
1 #!/usr/bin/perl -w | |
2 | |
3 use strict; | |
4 use warnings; | |
5 use Getopt::Std; | |
6 | |
7 &blast2sam; | |
8 | |
9 sub blast2sam { | |
10 my %opts = (); | |
11 getopts('s', \%opts); | |
12 die("Usage: blast2sam.pl <in.blastn>\n") if (-t STDIN && @ARGV == 0); | |
13 my ($qlen, $slen, $q, $s, $qbeg, $qend, @sam, @cigar, @cmaux, $show_seq); | |
14 $show_seq = defined($opts{s}); | |
15 @sam = (); @sam[0,4,6..8,10] = ('', 255, '*', 0, 0, '*'); | |
16 while (<>) { | |
17 if (@cigar && (/^Query=/ || /Score =.*bits.*Expect/)) { # print | |
18 &blast_print_sam(\@sam, \@cigar, \@cmaux, $qlen - $qend); | |
19 @cigar = (); | |
20 } | |
21 if (/^Query= (\S+)/) { | |
22 $sam[0] = $1; | |
23 } elsif (/\((\S+)\s+letters\)/) { | |
24 $qlen = $1; $qlen =~ s/,//g; | |
25 } elsif (/^>(\S+)/) { | |
26 $sam[2] = $1; | |
27 } elsif (/Length = (\d+)/) { | |
28 $slen = $1; | |
29 } elsif (/Score =\s+(\S+) bits.+Expect(\(\d+\))? = (\S+)/) { # the start of an alignment block | |
30 my ($as, $ev) = (int($1 + .499), $3); | |
31 $ev = "1$ev" if ($ev =~ /^e/); | |
32 @sam[1,3,9,11,12] = (0, 0, '', "AS:i:$as", "EV:Z:$ev"); | |
33 @cigar = (); $qbeg = 0; | |
34 @cmaux = (0, 0, 0, ''); | |
35 } elsif (/Strand = (\S+) \/ (\S+)/) { | |
36 $sam[1] |= 0x10 if ($2 eq 'Minus'); | |
37 } elsif (/Query\:\s(\d+)\s*(\S+)\s(\d+)/) { | |
38 $q = $2; | |
39 unless ($qbeg) { | |
40 $qbeg = $1; | |
41 push(@cigar, ($1-1) . "H") if ($1 > 1); | |
42 } | |
43 $qend = $3; | |
44 if ($show_seq) { | |
45 my $x = $q; | |
46 $x =~ s/-//g; $sam[9] .= $x; | |
47 } | |
48 } elsif (/Sbjct\:\s(\d+)\s*(\S+)\s(\d+)/) { | |
49 $s = $2; | |
50 if ($sam[1] & 0x10) { | |
51 $sam[3] = $3; | |
52 } else { | |
53 $sam[3] = $1 unless ($sam[3]); | |
54 } | |
55 &aln2cm(\@cigar, \$q, \$s, \@cmaux); | |
56 } | |
57 } | |
58 &blast_print_sam(\@sam, \@cigar, \@cmaux, $qlen - $qend); | |
59 } | |
60 | |
61 sub blast_print_sam { | |
62 my ($sam, $cigar, $cmaux, $qrest) = @_; | |
63 push(@$cigar, $cmaux->[1] . substr("MDI", $cmaux->[0], 1)); | |
64 push(@$cigar, $qrest . 'H') if ($qrest); | |
65 if ($sam->[1] & 0x10) { | |
66 @$cigar = reverse(@$cigar); | |
67 $sam->[9] = reverse($sam->[9]); | |
68 $sam->[9] =~ tr/atgcrymkswATGCRYMKSW/tacgyrkmswTACGYRKMSW/; | |
69 } | |
70 $sam->[9] = '*' if (!$sam->[9]); | |
71 $sam->[5] = join('', @$cigar); | |
72 print join("\t", @$sam), "\n"; | |
73 } | |
74 | |
75 sub aln2cm { | |
76 my ($cigar, $q, $s, $cmaux) = @_; | |
77 my $l = length($$q); | |
78 for (my $i = 0; $i < $l; ++$i) { | |
79 my $op; | |
80 # set $op | |
81 if (substr($$q, $i, 1) eq '-') { $op = 2; } | |
82 elsif (substr($$s, $i, 1) eq '-') { $op = 1; } | |
83 else { $op = 0; } | |
84 # for CIGAR | |
85 if ($cmaux->[0] == $op) { | |
86 ++$cmaux->[1]; | |
87 } else { | |
88 push(@$cigar, $cmaux->[1] . substr("MDI", $cmaux->[0], 1)); | |
89 $cmaux->[0] = $op; $cmaux->[1] = 1; | |
90 } | |
91 } | |
92 } |