annotate SplitOnRg.pl @ 1:76aff7bca1b2 draft

Uploaded
author geert-vandeweyer
date Tue, 18 Feb 2014 05:06:20 -0500
parents afa51021226c
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
1 #!/usr/bin/perl
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
2
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
3 use Getopt::Std;
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
4 getopts('i:o:O:k:p:', \%opts) ;
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
5
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
6
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
7 # split
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
8 $prefix = "primary_$opts{'O'}"."_";
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
9 $command = "cd $opts{'p'} && bamtools split -in $opts{'i'} -tag RG -tagPrefix '$prefix' -stub $opts{'p'}/data" ;
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
10 #print $command ."\n";
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
11 system($command);
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
12 chdir($opts{'p'});
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
13 # rename to correct format.
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
14 my @ls = `ls data.primary_$opts{'O'}_RG_*`;
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
15 open OUT, ">$opts{'o'}";
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
16 $idx = 0;
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
17 foreach(@ls) {
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
18 chomp();
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
19 my $from = $_;
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
20 $from =~ m/(.*_RG_)(.*)(\.bam)/;
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
21 my $rg = $2;
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
22 # remove data_
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
23 my $to = substr($from,5,-4);
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
24 $to .= "_visible_bam_$opts{'k'}";
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
25 $to =~ s/RG_//;
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
26 $to =~ m/primary_(\d+)_(.*)_visible.*/;
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
27 $rgreplace = $2;
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
28 $rgreplace =~ s/\.|-|_//g;
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
29 $to =~ s/(primary_\d+_)(.*)(_visible.*)/$1$rgreplace$3/;
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
30 system("mv $from $to");
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
31 $idx++;
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
32 print OUT "File $idx : $to : ReadGroup: $rg\n";
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
33 }
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
34 close OUT;
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
35 exit;
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
36
afa51021226c Uploaded
geert-vandeweyer
parents:
diff changeset
37