diff nrify_GOtable.pl @ 0:91261b42c07e draft

"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
author cristian
date Thu, 14 Apr 2022 13:28:05 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nrify_GOtable.pl	Thu Apr 14 13:28:05 2022 +0000
@@ -0,0 +1,39 @@
+#!/usr/bin/perl
+
+$usage= "
+
+nrify_GOtable.pl:
+
+removes duplicate entries for a gene from gene<tab>semicolon-separated GOterms table
+concatenates nonredundant categories for each gene
+
+Misha Matz July 2013, matz\@utexas.edu
+
+";
+
+$inp=shift or die $usage;
+
+open IN, $inp or die "cannot open input $inp\n";
+
+my %gos={};
+my $gene="";
+my $goline="";
+
+while(<IN>){
+	chomp;
+	($gene,$goline)=split('\t',$_);
+	if (!$gos{$gene}) {
+		$gos{$gene}=$goline;
+		next;
+	}
+	my @goo=split(';',$goline);
+	foreach $g (@goo){
+		if ($gos{$gene}=~/$g/){next;}
+		$gos{$gene}=$gos{$gene}.";".$g;
+	}
+}
+
+foreach $g (keys %gos){
+	if ($g=~/HASH/){next;}
+	print $g,"\t",$gos{$g},"\n";
+}
\ No newline at end of file