Mercurial > repos > yusuf > filter_bam_list
comparison filter_bam_by_list @ 0:42af0b971c55 default tip
intial commit
author | Yusuf Ali <ali@yusuf.email> |
---|---|
date | Wed, 25 Mar 2015 13:33:46 -0600 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:42af0b971c55 |
---|---|
1 #!/usr/bin/env perl | |
2 | |
3 use strict; | |
4 use warnings; | |
5 use IO::Handle; | |
6 | |
7 # Report lines of a file that have as one of the column values a value from the pattern file | |
8 @ARGV == 6 or die "Usage: $0 <input.bam> <named genomic regions.bed> <file of patterns> <filtered output.bam> <matched regions.bed> <output messages.txt>\n"; | |
9 | |
10 die "Input BAM file $ARGV[0] does not exist\n" if not -e $ARGV[0]; | |
11 die "Input BAM file $ARGV[0] is not readable\n" if not -r $ARGV[0]; | |
12 | |
13 my @alts; | |
14 if(-e $ARGV[2]){ | |
15 open(PATTERNS, $ARGV[2]) | |
16 or die "Cannot open $ARGV[1] for reading: $!\n"; | |
17 while(<PATTERNS>){ | |
18 chomp; | |
19 push @alts, $_; | |
20 } | |
21 close(PATTERNS); | |
22 } | |
23 else{ # else assume the arg is a list of names directly | |
24 @alts = split /\s+/, $ARGV[2]; | |
25 } | |
26 | |
27 my @regions; | |
28 my %seen; | |
29 my $regex = "^(?:".join("|", @alts).")\$"; | |
30 open(TAB, $ARGV[1]) | |
31 or die "Cannot open $ARGV[1] for reading: $!\n"; | |
32 while(<TAB>){ | |
33 chomp; | |
34 my @F = split /\t/, $_; | |
35 next unless @F > 3; | |
36 if($F[3] =~ /$regex/io){ | |
37 next if $seen{"$F[0]:$F[1]-$F[2]"}++; # sometimes regions are repeated, don't send these repeats to samtools | |
38 push @regions, $_; | |
39 } | |
40 } | |
41 close(TAB); | |
42 | |
43 die "No matches to desired names in the provided named genomic regions BED file, aborting filtered BAM file creation" if not @regions; | |
44 | |
45 open(BED, ">$ARGV[4]") | |
46 or die "Cannot open $ARGV[4] for writing: $!\n"; | |
47 print BED join("\n", @regions), "\n"; | |
48 close(BED); | |
49 | |
50 open(ERRFILE, ">$ARGV[5]") or die "Cannot open $ARGV[5] for writing: $!\n"; | |
51 STDOUT->fdopen(\*ERRFILE, "w") or die "Cannot redirect stdout to $ARGV[5]: $!\n"; | |
52 STDERR->fdopen(\*ERRFILE, "w") or die "Cannot redirect stderr to $ARGV[5]: $!\n"; | |
53 system("samtools view -h -L $ARGV[4] $ARGV[0] | samtools view -S -b - > $ARGV[3]") >> 8 | |
54 and die "Samtools failed: exit status ", ($?>>8), "\n"; | |
55 system("samtools index $ARGV[3]"); |