annotate bam2html.pl @ 1:4f6952e0af48 default tip

CREST - add crest.loc.sample
author Jim Johnson <jj@umn.edu>
date Wed, 08 Feb 2012 16:08:01 -0600
parents acc8d8bfeb9a
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
1 #!/usr/bin/perl -w
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
2 use strict;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
3 use Carp;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
4 use Getopt::Long;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
5 use English;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
6 use Pod::Usage;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
7 use Data::Dumper;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
8 use Bio::Assembly::IO;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
9 use Bio::DB::Sam;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
10 use Bio::DB::Sam::Constants;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
11 use File::Spec;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
12 use File::Path;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
13 use File::Copy;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
14 use File::Basename;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
15
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
16 use constant HTML_COLOR => {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
17 'MATCH' => '#000000', #black
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
18 'MISMATCH' => '#800000', #brown
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
19 'LOWQUAL' => '#C0C0C0', #grey
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
20 'INDEL' => '#FF8040', #Orange
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
21 'SCLIP' => '#6960EC', #Slate Blue2
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
22 };
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
23 my $lowqual_cutoff = 20;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
24
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
25 my ($bam_d, $bam_g, $file);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
26 my $ref_genome;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
27 my $range;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
28
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
29 my ( $help, $man, $version, $usage );
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
30 my $output;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
31 my $RNASeq;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
32 my $optionOK = GetOptions(
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
33 'd=s' => \$bam_d,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
34 'g=s' => \$bam_g,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
35 'f|i|file|input=s' => \$file,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
36 'r|ref_genome=s' => \$ref_genome,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
37 'range=s' => \$range,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
38 'RNASeq' => \$RNASeq,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
39 'o|output=s' => \$output,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
40 'h|help|?' => \$help,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
41 'man' => \$man,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
42 'usage' => \$usage,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
43 'v|version' => \$version,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
44 );
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
45
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
46 pod2usage(-verbose=>2) if($man or $usage);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
47 pod2usage(1) if($help or $version );
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
48 croak "you must specify at least one bam file" unless ($bam_d || $bam_g);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
49 warn "Missing reference genome, the view is not correct" unless($ref_genome);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
50 croak "You need either provide an input file or a range" unless ($file || $range);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
51
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
52 my @bams;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
53 push @bams, Bio::DB::Sam->new( -bam => $bam_d, -fasta => $ref_genome) if($bam_d);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
54 push @bams, Bio::DB::Sam->new( -bam => $bam_g, -fasta => $ref_genome) if($bam_g);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
55 open STDOUT, ">$output" if $output;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
56
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
57 if($range) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
58 print STDOUT bam2html($range, @bams);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
59 close STDOUT if $output;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
60 exit(0);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
61 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
62
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
63 open my $FILE, "<$file" or croak "can't open $file:$OS_ERROR";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
64 while( my $line = <$FILE>) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
65 chomp $line;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
66 my @fields = split /\t/, $line;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
67 print "<h3 style=\"color:blue;text-align:center\">$line</h3>\n";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
68 if(scalar(@fields) == 3) { # deal with file: chr start end format
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
69 my ($chr, $s, $e) = @fields;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
70 print STDOUT bam2html("$chr:$s-$e", @bams);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
71 next;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
72 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
73 # deal with CREST output file
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
74 # if($fields[0] =~ /^chr/) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
75 # $fields[0] = substr($fields[0], 3);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
76 # $fields[2] = substr($fields[2], 3);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
77 # }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
78 my ($chr, $s, $e) = ($fields[0], $fields[1], $fields[1]);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
79 print STDOUT bam2html("$chr:$s-$e", @bams);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
80 ($chr, $s, $e) = ($fields[4], $fields[5], $fields[5]);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
81 print STDOUT bam2html("$chr:$s-$e", @bams);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
82 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
83 close STDOUT if($output);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
84 exit(0);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
85 #print bam2html("12:11676858-11676858", $bam1, $bam2);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
86
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
87
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
88 sub get_input_bam {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
89 my ($raw_bam_dir, $sample, $group) = @_;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
90
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
91 $raw_bam_dir = File::Spec->catdir($raw_bam_dir, $group);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
92 opendir(my $dh, $raw_bam_dir) or croak "can't open directory: $raw_bam_dir: $OS_ERROR";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
93 my @files = grep { /^$sample-.*bam$/ } readdir($dh);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
94 close $dh;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
95 return $files[0];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
96 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
97
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
98
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
99 # this function returns a <pre> </pre> block for the specific contig in the ace file
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
100 sub bam2html {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
101 my ($range, @bams) = @_;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
102 my ($chr, $start, $end) = $range =~ m/^(.*?):(\d+)-(\d+)/;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
103 if(!$chr or !$start or !$end) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
104 croak "The range format is not correct, use chr:start-end";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
105 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
106 my ($r_start, $r_end) = ($start, $end);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
107 my $name_len = length($range);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
108 for my $bam (@bams) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
109 my $segment = $bam->segment(-seq_id => $chr, -start => $start, -end => $end);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
110 my @alignments = $segment->features;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
111 # return "There are too many reads in this region, this is must be a highly repetitive region, abort!"
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
112 # if(scalar(@alignments) > 500);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
113 for my $a(@alignments) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
114 my $l = length($a->query->name);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
115 $name_len = $l if($name_len < $l);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
116 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
117 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
118 if($start == $end) { # we want to check a specific point
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
119 if($RNASeq) { # for RNASeq we just extend a little bit instead of dynamic do it
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
120 $r_start = $start - 100;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
121 $r_end = $end + 100;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
122 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
123 else {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
124 for my $bam (@bams) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
125 my $segment = $bam->segment(-seq_id => $chr, -start => $start - 1, -end => $end + 1);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
126 my @alignments = $segment->features;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
127 for my $a (@alignments) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
128 next if($a->start > $end || $a->end < $start);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
129 $r_start = $a->start if $a->start < $r_start;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
130 $r_end = $a->end if $a->end > $r_end;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
131 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
132 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
133 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
134 $range = "$chr:$r_start-$r_end";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
135 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
136 my $rtn_str = "<pre>\n";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
137 my($ref, $pos2padded) = get_padded_ref($range, @bams);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
138 $rtn_str .= print_bam_ruler($r_start, $r_end, $pos2padded, $name_len) . "\n";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
139 my $line = print_ref($ref, $range, $name_len);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
140 my $line_len = length($line);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
141 $rtn_str .= $line . "\n" . ' ' x $line_len . "\n";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
142 $ref =~ s/\*//g; #remove * from ref sequence
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
143 for my $bam (@bams) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
144 my $segment = $bam->segment(-seq_id => $chr, -start => $start, -end => $end);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
145 my @alignments = $segment->features;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
146 for my $a (@alignments) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
147 #next if($a->cigar_str !~ m/S/);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
148 if($a->strand == 1) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
149 my $tmp_str = print_bam_seq($a, $r_start, $r_end, $ref, $pos2padded, $name_len);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
150 $rtn_str .= $tmp_str . "\n" if($tmp_str);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
151 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
152 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
153 for my $a (@alignments) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
154 #next if($a->cigar_str !~ m/S/);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
155 if($a->strand == -1) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
156 my $tmp_str = print_bam_seq($a, $r_start, $r_end, $ref, $pos2padded, $name_len);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
157 $rtn_str .= $tmp_str . "\n" if($tmp_str);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
158 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
159 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
160 $rtn_str .= '_' x $line_len . "\n";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
161 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
162 $rtn_str .= "\n</pre>";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
163 return $rtn_str;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
164 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
165
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
166 # only print the unpadded position, which is meaningful
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
167 sub print_bam_ruler {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
168 my($start, $end, $pos2padded, $name_len) = @_;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
169 my ($string, $mark);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
170 $string = $mark = " "x($name_len + 2);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
171
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
172 #print | at 20 and . at 10
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
173 for( my $i = $start; $i <= $end; $i++) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
174 $mark .= $i % 10 == 0 ? ($i % 20 == 0 ? "|" : ".") : " ";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
175 last if($i == $end);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
176 $mark .= ' ' x ($pos2padded->[$i - $start + 2] - $pos2padded->[$i - $start + 1] - 1);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
177 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
178 # print numbers above | for padded consensus
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
179 my $padded_i = 0;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
180 for( my $i = 1; $i <= $end - $start + 1; $i++ ) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
181 my $j = $i + $start - 1;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
182 my $overhead = $pos2padded->[$i] - $padded_i;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
183 if($overhead > 0 ) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
184 $string .= ' ' x $overhead;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
185 $padded_i += $overhead;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
186 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
187 if( $j % 20 == 0) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
188 my $l = length($j);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
189 my $half_l = int(($l+1)/2);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
190 $string = substr($string, 0, length($string) - $half_l );
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
191 $string .= $j;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
192 $padded_i += ($l - $half_l);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
193 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
194 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
195 return join("\n", ($string, $mark));
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
196 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
197
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
198 sub print_ref {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
199 my( $ref, $chr, $name_len) = @_;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
200 return $chr . " " x ($name_len + 2 - length($chr)) . $ref;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
201 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
202
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
203 sub get_padded_ref {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
204 my ($range, @bams) = @_;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
205 my $ref_str; #padded ref genome
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
206 my @pos2padded;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
207 my($chr, $start, $end) = $range =~ m/(.*?):(\d+)-(\d+)/;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
208
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
209 @pos2padded = 0 .. ($end - $start + 1);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
210 push @pos2padded, 100 + $end - $start;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
211
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
212 foreach my $bam (@bams) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
213 my ($d, $cum_pad) = (0, 0);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
214 my $padded_fun = sub {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
215 my ($seqid, $pos, $p) = @_;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
216 return if($pos < $start || $pos > $end);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
217 $d++;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
218 $pos2padded[$d] += $cum_pad;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
219 my $max_ins = 0;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
220 for my $pileup (@$p) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
221 $max_ins = $pileup->indel if($max_ins < $pileup->indel);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
222 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
223 if($max_ins > $pos2padded[$d+1] + $cum_pad - $pos2padded[$d] - 1) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
224 $cum_pad += $max_ins - ($pos2padded[$d+1] + $cum_pad- $pos2padded[$d] - 1);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
225 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
226 };
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
227 $bam->pileup($range, $padded_fun);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
228 if($d <= $end - $start + 1) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
229 for(my $i = $d + 1; $i <= $end-$start+1; $i++) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
230 $pos2padded[$i] += $cum_pad;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
231 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
232 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
233 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
234 my $refseq = $bams[0]->segment($chr, $start, $end)->dna;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
235 my @str = split //, "*" x $pos2padded[$end - $start + 1];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
236 for my $i ( 1 .. ($end - $start + 1)) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
237 $str[$pos2padded[$i]] = substr($refseq, $i-1, 1);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
238 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
239 shift @str; #remove the 0th base
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
240 return (join( '', @str), \@pos2padded );
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
241 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
242
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
243 sub print_bam_seq { # it's complicated
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
244 my ($align, $start, $end, $ref, $pos2padded, $name_len) = @_;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
245 my $rtn = $align->query->name;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
246 $rtn .= ' ' x ($name_len - length($rtn));
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
247 $rtn .= $align->strand == -1 ? '- ' : '+ ';
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
248 my $p_s = $align->start;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
249 my $p_e = $align->end;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
250 my $seq = $align->query->dna;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
251 my @qual = $align->qscore;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
252 my $repeat = 0;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
253 if($align->has_tag("XT")) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
254 $repeat = 1 if($align->aux_get("XT") ne "U");
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
255 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
256
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
257 my $r_cigar = $align->cigar_array;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
258 my ($s, $e) = ($align->query->start, $align->query->end);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
259
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
260 # reads partial in the region
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
261 my $leading = "";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
262 if($p_s < $start) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
263 ($r_cigar, $p_s, $s) = find_new_start($r_cigar, $p_s, $s, $start);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
264 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
265
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
266 my $tailing = "";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
267 if($p_e > $end) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
268 ($r_cigar, $p_e, $e) = find_new_end($r_cigar, $p_e, $e, $end);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
269 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
270 return if($s >= $e);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
271 my @cigar = @{$r_cigar};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
272
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
273 # deal with leading softclip
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
274 my $op = shift @cigar;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
275 my $l = $pos2padded->[$p_s - $start + 1] - $pos2padded->[1];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
276 if($op->[0] eq 'S') {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
277 my $ss = $op->[1] - $l;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
278 if($ss < 0) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
279 $ss = -$ss;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
280 $leading .= ' ' x $ss;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
281 $ss = 0;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
282 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
283 $leading .= '<font color="' . HTML_COLOR->{'SCLIP'} . '">';
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
284 for( my $i = $ss; $i < $op->[1]; $i++) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
285 my $c = substr($seq, $i, 1);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
286 $c = lc $c if($qual[$i] < $lowqual_cutoff);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
287 $leading .= $c;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
288 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
289 $leading .= '</font>' ;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
290 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
291 else {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
292 $leading .= ' ' x $l;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
293 unshift @cigar, $op;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
294 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
295
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
296 #dealing with tailing softclip
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
297 $op = pop @cigar;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
298 $l = $pos2padded->[$end - $start + 1] - $pos2padded->[$p_e - $start + 1];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
299 if($op->[0] eq 'S') {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
300 my $ee = $op->[1] - $l;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
301 $ee = 0 if($ee < 0 );
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
302 $tailing = '<font color="' . HTML_COLOR->{'SCLIP'} . '">';
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
303 for( my $i = 0; $i < $op->[1] - $ee; $i++){
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
304 my $c = substr($seq, $i + $e, 1);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
305 $c = lc $c if($qual[$i + $e] < $lowqual_cutoff);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
306 $tailing .= $c;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
307 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
308 $tailing .= '</font>';
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
309 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
310 else{
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
311 push @cigar, $op;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
312 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
313
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
314 # generate the alignment part, only M, I, D and N
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
315 my $mid = '';
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
316 my $mode;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
317 foreach $op (@cigar) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
318 my $l = $op->[1];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
319 if($op->[0] eq 'M') {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
320 my $line = '';
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
321 while($l > 0 ){
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
322 $l--;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
323 my $c = substr($seq, $s - 1, 1); #seq is 0 based
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
324 my $newmode = "MISMATCH";
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
325 my $cc = substr($ref, $p_s - $start, 1);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
326 #last unless ($c && $cc);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
327 $newmode = "MATCH" if($cc eq $c);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
328 if($qual[$s-1] < $lowqual_cutoff) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
329 $c = lc $c;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
330 $newmode = 'LOWQUAL' if($newmode eq 'MATCH');
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
331 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
332 if(!$mode) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
333 $line .= '<font color="' . HTML_COLOR->{$newmode} . '">';
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
334 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
335 elsif($mode ne $newmode) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
336 $line .= '</font>' . '<font color="' . HTML_COLOR->{$newmode} . '">';
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
337 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
338 $mode = $newmode;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
339 $line .= $c;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
340 $s++; $p_s++;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
341 # dealing with padded * in reference genome
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
342 if($p_s < $p_e && $s < $e && $l > 0) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
343 my $tmp = $pos2padded->[$p_s - $start + 1]-$pos2padded->[$p_s-1 - $start + 1];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
344 if( $tmp > 1) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
345 $line .= '</font>';
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
346 $line .= '<font color="' . HTML_COLOR->{'INDEL'} . '">';
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
347 $line .= '*' x ($tmp - 1);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
348 $mode = 'INDEL';
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
349 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
350 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
351 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
352 $mid .= $line;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
353 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
354 if($op->[0] eq 'D' || $op->[0] eq 'I' || $op->[0] eq 'N') {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
355 my $newmode = 'INDEL';
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
356 my $tmp; #extra padded * after indel
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
357 $mid .= '</font>' if($mode && $mode ne $newmode);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
358 $mid .= '<font color="' . HTML_COLOR->{'INDEL'} . '">';
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
359
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
360 if($op->[0] eq 'D' || $op->[0] eq 'N') {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
361 $mid .= ($op->[0] eq 'D' ? '*' : '=') x $l;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
362 $p_s += $l;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
363 $tmp = $pos2padded->[$p_s - $start + 1]-$pos2padded->[$p_s - $l - $start ] - $l if($p_s < $p_e);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
364 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
365 else{
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
366 $tmp = $pos2padded->[$p_s - $start + 1] - $pos2padded->[$p_s - 1 - $start + 1] - $l if($p_s < $p_e);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
367 while($l > 0 ) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
368 my $c = substr($seq, $s - 1, 1);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
369 $c = lc $c if($qual[$s - 1] < $lowqual_cutoff);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
370 $mid .= $c;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
371 $l--; $s++;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
372 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
373 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
374 $mode = $newmode;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
375 if($p_s < $p_e && $tmp > 1) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
376 $mid .= '*' x ($tmp - 1);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
377 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
378 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
379 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
380 $mid .= '</font>';
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
381 $rtn .= $leading . $mid . $tailing;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
382 $rtn = '<b><i>' . $rtn . '</i></b>' if($repeat);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
383 return $rtn;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
384 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
385
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
386 sub find_new_start {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
387 my ($r_cigar, $p_s, $s, $start) = @_;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
388 my @cigar = @{$r_cigar};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
389
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
390 while(1) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
391 my $op = shift @cigar;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
392 next if( $op->[0] eq 'S' || $op->[0] eq 'H');
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
393 if( $op->[0] eq 'I') {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
394 $s += $op->[1]; next;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
395 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
396 if( $p_s + $op->[1] < $start ) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
397 $p_s += $op->[1];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
398 $s += $op->[1] if $op->[0] eq 'M';
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
399 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
400 else {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
401 $s += ($start - $p_s) if $op->[0] eq 'M';
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
402 unshift @cigar, [$op->[0], $op->[1] - ($start - $p_s)];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
403 $p_s = $start;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
404 return (\@cigar, $p_s, $s);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
405 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
406 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
407 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
408
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
409 sub find_new_end {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
410 my ($r_cigar, $p_e, $e, $end) = @_;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
411 my @cigar = @{$r_cigar};
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
412
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
413 while(1) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
414 my $op = pop @cigar;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
415 next if( $op->[0] eq 'S' || $op->[0] eq 'H');
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
416 if( $op->[0] eq 'I') {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
417 $e -= $op->[1]; next;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
418 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
419 if( $p_e - $op->[1] > $end ) {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
420 $p_e -= $op->[1];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
421 $e -= $op->[1] if $op->[0] eq 'M';
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
422 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
423 else {
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
424 $e -= ($p_e - $end) if $op->[0] eq 'M';
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
425 push @cigar, [$op->[0], $op->[1] - ($p_e - $end)];
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
426 $p_e = $end;
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
427 return (\@cigar, $p_e, $e);
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
428 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
429 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
430 }
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
431
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
432 =head1 NAME
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
433
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
434 bam2html.pl - a bam file viewer that just simple display part of the alignment as HTML file.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
435
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
436
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
437 =head1 VERSION
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
438
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
439 This documentation refers to bam2html.pl version 0.0.1.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
440
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
441
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
442 =head1 USAGE
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
443
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
444 Display part of a bam file:
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
445 bam2html.pl -r hg18.fa -d diag.bam --range 1:123566-123766 -o diag.html
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
446 Display part of two bam files, one diagnositc, one germlie for comparison.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
447 bam2html.pl -r hg18.fa -d diag.bam -g germline.bam --range 1:123566-123766 -o diag.html
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
448 Display part of two bam files, one diagnositc, one germlie for comparison from
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
449 a list of positions in a file, each line should be tab sepearted as: chr, start, and end.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
450 bam2html.pl -r hg18.fa -d diag.bam -g germline.bam -o diag.html -f position.txt
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
451 Display part of two bam files, one diagnositc, one germlie for comparison from a file
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
452 generated by CREST.pl.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
453 bam2html.pl -r hg18.fa -d diag.bam -g germline.bam -o predSV.html -f predSV.txt
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
454
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
455 =head1 REQUIRED ARGUMENTS
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
456
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
457 To run the program, several parameter must specified.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
458 -d: The input (diagnositic) bam file
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
459 -g: The input (germ line) bam file
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
460 -r, --ref_genome: The reference genome file in fa format
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
461
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
462 =head1 OPTIONS
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
463
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
464 The options that can be used for the program.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
465 -o: The output file, default to STDOUT if missing.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
466 --range: The range where SV will be detected, using chr1:100-200 format.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
467 -f, -i: The input file from either CREST.pl or tab seperated chr, start, end.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
468
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
469 -h, --help Help information
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
470 --man Man page.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
471 --usage Usage information.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
472 --version Software version.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
473
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
474
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
475 =head1 DESCRIPTION
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
476
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
477 This is a bam file viewer that just simple display part of the alignment as
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
478 HTML file. The program is developed to view Structure Varitions around break
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
479 point. So manual review will a breeze. Any way this program can be used
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
480 otherway, but don't put a too big range to display as the view will not
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
481 be pretty as each read will occupy a line.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
482
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
483
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
484 =head1 DIAGNOSTICS
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
485
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
486 If the program does not respond for minutes, please be a little bit patient as
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
487 sometimes generating the html file could take longer time.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
488 If you provide a range as "chr1:50-100" and nothing was output, please make sure
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
489 the reference genomes for mapping and display are exact the same and the chrom
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
490 name is chr1 not 1.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
491
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
492
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
493 =head1 DEPENDENCIES
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
494
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
495 The program depend on several packages:
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
496 1. Bioperl perl module.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
497 2. Bio::DB::Sam, version 1.5 or later, it requires samtools lib installed.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
498
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
499
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
500 =head1 BUGS AND LIMITATIONS
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
501
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
502 There are no known bugs in this module, but the method is limitted to bam file
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
503 that has soft-clipping cigar string generated.Please report problems to
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
504 Jianmin Wang (Jianmin.Wang@stjude.org)
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
505 Patches are welcome.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
506
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
507 =head1 AUTHOR
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
508
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
509 Jianmin Wang (Jianmin.Wang@stjude.org)
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
510
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
511
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
512
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
513 =head1 LICENCE AND COPYRIGHT
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
514
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
515 Copyright (c) 2010 by St. Jude Children's Research Hospital.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
516
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
517 This program is free software: you can redistribute it and/or modify
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
518 it under the terms of the GNU General Public License as published by
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
519 the Free Software Foundation, either version 2 of the License, or
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
520 (at your option) any later version.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
521
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
522 This program is distributed in the hope that it will be useful,
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
523 but WITHOUT ANY WARRANTY; without even the implied warranty of
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
524 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
525 GNU General Public License for more details.
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
526
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
527 You should have received a copy of the GNU General Public License
acc8d8bfeb9a Uploaded
jjohnson
parents:
diff changeset
528 along with this program. If not, see <http://www.gnu.org/licenses/>.