diff blast_parser.pl @ 0:87eda806422d draft

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/blast_parser commit 75c6b4d9bd23cdd5f8e5626b1b01f2abba32c274-dirty
author earlhaminst
date Mon, 12 Dec 2016 07:13:57 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/blast_parser.pl	Mon Dec 12 07:13:57 2016 -0500
@@ -0,0 +1,43 @@
+#!/usr/bin/perl
+use strict;
+use warnings;
+use List::Util qw(min max);
+
+# A simple Perl parser to convert a BLAST 12-column or 24-column output into a
+# 3-column input for hcluster_hg (id1, id2, weight):
+# parse_blast.pl <file>
+
+use constant LOG_E_10 => log(10);
+
+my $file1 = $ARGV[0];
+open my $fh1, '<', $file1;
+
+while (my $line = <$fh1>) {
+    my @row = split(/\t/, $line);
+
+    if ($row[0] eq $row[1]) {
+        # ignore self matching hits
+    } else {
+        # Convert evalue to an integer weight with max 100
+        my $weight = 100;
+
+        #if the evalue is 0, leave weight at 100
+        if ($row[10] != 0 && $row[10] != 0.0) {
+            $weight = min(100, positive_round(-1 * log10($row[10])));
+        }
+        print"$row[0]\t$row[1]\t$weight\n";
+    }
+}
+close $fh1;
+
+# Calculate logarithm to base 10 of a number
+sub log10 {
+    my $n = shift;
+    return log($n) / LOG_E_10;
+}
+
+# Round a positive float to the nearest integer
+sub positive_round{
+    my $n = shift;
+    return int($n + 0.5);
+}