Mercurial > repos > cristian > rbgoa
diff nrify_GOtable.pl @ 0:91261b42c07e draft
"planemo upload commit 25eebba0c98dd7a5a703412be90e97f13f66b5bc"
| author | cristian |
|---|---|
| date | Thu, 14 Apr 2022 13:28:05 +0000 |
| parents | |
| children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/nrify_GOtable.pl Thu Apr 14 13:28:05 2022 +0000 @@ -0,0 +1,39 @@ +#!/usr/bin/perl + +$usage= " + +nrify_GOtable.pl: + +removes duplicate entries for a gene from gene<tab>semicolon-separated GOterms table +concatenates nonredundant categories for each gene + +Misha Matz July 2013, matz\@utexas.edu + +"; + +$inp=shift or die $usage; + +open IN, $inp or die "cannot open input $inp\n"; + +my %gos={}; +my $gene=""; +my $goline=""; + +while(<IN>){ + chomp; + ($gene,$goline)=split('\t',$_); + if (!$gos{$gene}) { + $gos{$gene}=$goline; + next; + } + my @goo=split(';',$goline); + foreach $g (@goo){ + if ($gos{$gene}=~/$g/){next;} + $gos{$gene}=$gos{$gene}.";".$g; + } +} + +foreach $g (keys %gos){ + if ($g=~/HASH/){next;} + print $g,"\t",$gos{$g},"\n"; +} \ No newline at end of file
