Mercurial > repos > earlhaminst > blast_parser
annotate blast_parser.pl @ 2:376ed15e0d27 draft
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit f5c5f3d6ce676937f5c673ec7fc0631a9f490dc2
author | earlhaminst |
---|---|
date | Fri, 24 Mar 2017 12:14:47 -0400 |
parents | 87eda806422d |
children |
rev | line source |
---|---|
0
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
1 #!/usr/bin/perl |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
2 use strict; |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
3 use warnings; |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
4 use List::Util qw(min max); |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
5 |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
6 # A simple Perl parser to convert a BLAST 12-column or 24-column output into a |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
7 # 3-column input for hcluster_hg (id1, id2, weight): |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
8 # parse_blast.pl <file> |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
9 |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
10 use constant LOG_E_10 => log(10); |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
11 |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
12 my $file1 = $ARGV[0]; |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
13 open my $fh1, '<', $file1; |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
14 |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
15 while (my $line = <$fh1>) { |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
16 my @row = split(/\t/, $line); |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
17 |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
18 if ($row[0] eq $row[1]) { |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
19 # ignore self matching hits |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
20 } else { |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
21 # Convert evalue to an integer weight with max 100 |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
22 my $weight = 100; |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
23 |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
24 #if the evalue is 0, leave weight at 100 |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
25 if ($row[10] != 0 && $row[10] != 0.0) { |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
26 $weight = min(100, positive_round(-1 * log10($row[10]))); |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
27 } |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
28 print"$row[0]\t$row[1]\t$weight\n"; |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
29 } |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
30 } |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
31 close $fh1; |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
32 |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
33 # Calculate logarithm to base 10 of a number |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
34 sub log10 { |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
35 my $n = shift; |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
36 return log($n) / LOG_E_10; |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
37 } |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
38 |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
39 # Round a positive float to the nearest integer |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
40 sub positive_round{ |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
41 my $n = shift; |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
42 return int($n + 0.5); |
87eda806422d
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
earlhaminst
parents:
diff
changeset
|
43 } |