comparison add_names_to_bed @ 0:7cdd13ff182a default tip

initial commit
author Yusuf Ali <ali@yusuf.email>
date Wed, 25 Mar 2015 15:49:28 -0600
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:7cdd13ff182a
1 #!/usr/bin/env perl
2
3 use strict;
4 use warnings;
5 use Set::IntervalTree;
6
7 # This script adds names to the first BED file regions, based on the names for regions provided
8 # by the second BED file. The output BED file is the same as the first BED file, but with names added where possible.
9 @ARGV == 4 or die "Usage: $0 <unnamed input regions.bed> <named input regions.bed> <renamed output.bed> <output log.txt>\n";
10
11 my %names;
12 open(NAMED_BED, $ARGV[1])
13 or die "Cannot open $ARGV[1] for reading: $!\n";
14 while(<NAMED_BED>){
15 my @F = split /\t/, $_;
16 next unless @F > 3;
17 $names{$F[0]} = Set::IntervalTree->new() if not exists $names{$F[0]};
18 $names{$F[0]}->insert($F[3], $F[1], $F[2]);
19 }
20 close(NAMED_BED);
21 die "The BED file $ARGV[1] did not contain any named regions, aborting renaming procedure\n" if not keys %names;
22
23 open(INPUT_BED, $ARGV[0])
24 or die "Cannot open $ARGV[0] for reading: $!\n";
25 open(OUTPUT_BED, ">$ARGV[2]")
26 or die "Cannot open $ARGV[2] for writing: $!\n";
27 my $num_datalines = 0;
28 my $num_changes = 0;
29 while(<INPUT_BED>){
30 chomp;
31 my @F = split /\t/, $_;
32 if(@F < 3){
33 # not a data line
34 print OUTPUT_BED $_, "\n";
35 next;
36 }
37 $num_datalines++;
38 my $chr = $F[0];
39 if(not exists $names{$chr}){ # must be a contig that was in the named BED file
40 if(exists $names{"chr$chr"}){
41 $chr = "chr$chr";
42 }
43 elsif($chr =~ /^chr(\S+)/ and exists $names{$1}){
44 $chr = $1;
45 }
46 else{
47 next;
48 }
49 }
50 my $renames = $names{$chr}->fetch($F[1], $F[2]+1);
51 if(not @$renames){
52 print OUTPUT_BED $_,"\n"; # no mod
53 }
54 else{
55 $num_changes++;
56 my %h;
57 my @renames = grep {not $h{$_}++} @$renames; # just get unique set of names
58 print OUTPUT_BED join("\t", @F[0..2], join("/", @renames), @F[4..$#F]), "\n"; # with new name
59 }
60 }
61 close(OUTPUT_BED);
62 close(INPUT_BED);
63
64 if(open(LOG, ">>$ARGV[3]")){
65 print LOG "Changed $num_changes/$num_datalines lines\n";
66 }
67 else{
68 print "Could not append to log file $ARGV[3]: writing to stdout instead\nChanged $num_changes/$num_datalines lines\n";
69 }
70