annotate blast_parser.pl @ 0:87eda806422d draft

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
author earlhaminst
date Mon, 12 Dec 2016 07:13:57 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
1 #!/usr/bin/perl
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
2 use strict;
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
3 use warnings;
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
4 use List::Util qw(min max);
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
5
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
6 # A simple Perl parser to convert a BLAST 12-column or 24-column output into a
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
7 # 3-column input for hcluster_hg (id1, id2, weight):
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
8 # parse_blast.pl <file>
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
9
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
10 use constant LOG_E_10 => log(10);
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
11
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
12 my $file1 = $ARGV[0];
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
13 open my $fh1, '<', $file1;
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
14
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
15 while (my $line = <$fh1>) {
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
16 my @row = split(/\t/, $line);
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
17
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
18 if ($row[0] eq $row[1]) {
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
19 # ignore self matching hits
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
20 } else {
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
21 # Convert evalue to an integer weight with max 100
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
22 my $weight = 100;
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
23
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
24 #if the evalue is 0, leave weight at 100
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
25 if ($row[10] != 0 && $row[10] != 0.0) {
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
26 $weight = min(100, positive_round(-1 * log10($row[10])));
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
27 }
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
28 print"$row[0]\t$row[1]\t$weight\n";
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
29 }
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
30 }
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
31 close $fh1;
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
32
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
33 # Calculate logarithm to base 10 of a number
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
34 sub log10 {
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
35 my $n = shift;
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
36 return log($n) / LOG_E_10;
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
37 }
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
38
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
39 # Round a positive float to the nearest integer
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
40 sub positive_round{
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
41 my $n = shift;
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
42 return int($n + 0.5);
87eda806422d planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff changeset
43 }