annotate rapsodyn/filtersam_mapped_and_unique.pl @ 15:56d328bce3a7 draft default tip

Uploaded
author mcharles
date Thu, 29 Jan 2015 08:54:06 -0500
parents 0a6c1cfe4dc8
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
1 #!/usr/bin/perl
10
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
2 #V1.1.0 manage empty files
7
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
3 #V1.0.1 added log, option parameters
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
4 use strict;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
5 use warnings;
7
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
6 use Getopt::Long;
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
7
7
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
8 my $input_sam_file;
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
9 my $output_sam_file;
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
10 my $log_file;
10
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
11 my $empty_file=1;
7
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
12
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
13 my %bitscore_all;
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
14 my %bitscore_selected;
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
15
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
16 GetOptions (
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
17 "input_sam_file=s" => \$input_sam_file,
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
18 "output_sam_file=s" => \$output_sam_file,
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
19 "log_file=s" => \$log_file
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
20 ) or die("Error in command line arguments\n");
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
21
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
22 open(IN, $input_sam_file) or die ("Can't open $input_sam_file\n");
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
23 while (my $line=<IN>){
7
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
24 if (($line =~ /^\@SQ/)||($line =~ /^\@PG/)){
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
25 #Header conservation
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
26 print $line;
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
27 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
28 else {
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
29 #Optionnal flag verification
7
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
30 my @fields_all = split (/\s+/,$line);
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
31 my $bit = $fields_all[1];
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
32 if ($bitscore_all{$bit}){
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
33 $bitscore_all{$bit}++;
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
34 }
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
35 else {
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
36 $bitscore_all{$bit}=1;
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
37 }
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
38 if (($line =~ /XT\:A\:U/)&&($line =~ /X0\:i\:1/)&&($line =~ /X1\:i\:0\s/)){
7
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
39 my @fields_selected = split (/\s+/,$line);
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
40 if (($fields_selected[1]==83)||($fields_selected[1]==163)||($fields_selected[1]==147)||($fields_selected[1]==99)){
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
41 print $line;
10
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
42 $empty_file=0;
7
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
43 my $bit = $fields_selected[1];
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
44 if ($bitscore_selected{$bit}){
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
45 $bitscore_selected{$bit}++;
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
46 }
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
47 else {
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
48 $bitscore_selected{$bit}=1;
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
49 }
0
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
50 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
51 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
52 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
53 }
442a7c88b886 Uploaded
mcharles
parents:
diff changeset
54
7
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
55 close (IN);
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
56 open (LF,">$log_file") or die("Can't open $log_file\n");
10
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
57 if ($empty_file==0){
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
58 print LF "\n####\t Sam filtering \n";
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
59 print LF "## Before filtering\n";
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
60 print LF "bitscore\t:\t";
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
61 foreach my $key (sort {$bitscore_all{$b} <=> $bitscore_all{$a}} keys %bitscore_all) {
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
62 print LF $key,"\t*\t";
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
63 }
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
64 print LF "\n number \t:\t";
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
65 foreach my $key (sort {$bitscore_all{$b} <=> $bitscore_all{$a}} keys %bitscore_all) {
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
66 print LF $bitscore_all{$key},"\t*\t";
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
67 }
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
68 print LF "\n";
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
69 print LF "## After filtering\n";
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
70 print LF "bitscore\t:\t";
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
71 foreach my $key (sort {$bitscore_selected{$b} <=> $bitscore_selected{$a}} keys %bitscore_selected) {
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
72 print LF $key,"\t*\t";
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
73 }
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
74 print LF "\n number \t:\t";
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
75 foreach my $key (sort {$bitscore_selected{$b} <=> $bitscore_selected{$a}} keys %bitscore_selected) {
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
76 print LF $bitscore_selected{$key},"\t*\t";
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
77 }
0a6c1cfe4dc8 Uploaded
mcharles
parents: 7
diff changeset
78 print LF "\n";
7
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
79 }
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
80 close (LF);
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
81
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
82
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
83
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
84
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
85
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
86
3f7b0788a1c4 Uploaded
mcharles
parents: 0
diff changeset
87