Mercurial > repos > mmaiensc > ember
diff GALAXY_FILES/tools/EMBER/Compare_Targets.pl @ 0:003f802d4c7d
Uploaded
author | mmaiensc |
---|---|
date | Wed, 29 Feb 2012 15:03:33 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/GALAXY_FILES/tools/EMBER/Compare_Targets.pl Wed Feb 29 15:03:33 2012 -0500 @@ -0,0 +1,148 @@ +#!/usr/bin/perl +# compares overlaps in the number of unique genes between two sets + +use Getopt::Long; + +# +# command line arguments +# +$t1 = ""; +$t2 = ""; +$o = ""; +$of = 0; +$n = 1; + +$options = "Usage: ./Compare_Targets.pl <OPTIONS> + -t1 .targets file 1 (EMBER output, required) + -t2 .targets file 2 (EMBER output, required) + -o output file (optional, if you want a gene list printed) + output list prints all unique genes + -of output type (default $of) + 0 - all shared targets + 1 - all targets in list 1 only + 2 - all targets in list 2 only + 3 - union of list 1 and 2 + -n compare gene names or probe ids (0 = ids, 1 = names, default $n) +\n"; + +GetOptions('t1=s' => \$t1, + 't2=s' => \$t2, + 'o=s' => \$o, + 'of=i' => \$of, + 'n=i' => \$n +) || die "\n$options\n"; + +if( $t1 eq "" ){ + print "\nError: set a value for -t1\n\n$options\n"; + exit; +} +if( $t2 eq "" ){ + print "\nError: set a value for -t2\n\n$options\n"; + exit; +} +if( $of != 0 && $of != 1 && $of != 2 && $of != 3 ){ + print "\nError: set -of to be 0, 1, 2, or 3\n\n$options\n"; + exit; +} +if( $n != 0 && $n != 1 ){ + print "\nError: set -n to be 0 or 1\n\n$options\n"; + exit; +} + +# +# read in gene list from each file +# +@list1 = &read_list( $t1 ); +@list2 = &read_list( $t2 ); + +printf("\nFound %i unique genes in %s, %i in %s\n", $#list1+1, $t1, $#list2+1, $t2); + +# +# compare lists and print out if desired +# +if( $o ne "" ){open(OUT,">$o");} +$i = 0; +$j = 0; +$end1 = $#list1; +$end2 = $#list2; +$l1o = (); +$l2o = (); +$l12 = (); +while( $i<= $end1 && $j<= $end2 ){ + if( $list1[$i] eq $list2[$j] ){ + if( $o ne "" && ($of == 0 || $of == 3) ){ + print OUT "$list1[$i]\n"; + } + $l12++; + $i++; + $j++; + } + elsif( $list1[$i] lt $list2[$j] ){ + if( $o ne "" && ($of == 1 || $of == 3) ){ + print OUT "$list1[$i]\n"; + } + $l1o++; + $i++; + } + else{ + if( $o ne "" && ($of == 2 || $of == 3) ){ + print OUT "$list2[$j]\n"; + } + $l2o++; + $j++; + } +} +if( $o ne "" ){close(OUT);} +printf("\n%s only: %i\n%s only: %i\nshared: %i\n\n", $t1, $l1o, $t2, $l2o, $l12); + + + + + +exit; +############## +# read in gene list from .targets file and sort it, then only print those genes that are unique +sub read_list{ + my @rval; + my @sval; + my @final; + + @rval = (); + open(IN,"$_[0]") || die "Error: can't open file $_[0]\n"; + while($line = <IN>){ + chomp($line); + @parts = split(' ',$line); + if( $parts[0] eq "GENE:" ){ + push(@rval, $parts[1+$n]); + } + if( $parts[0] eq "TGENE:" ){ + push(@rval, $parts[2+$n]); + } + } + close(IN); + + @sval = sort{ $a cmp $b } @rval; + + @final = (); + push(@final, @sval[0]); + for($i=1; $i<= $#sval; $i++){ + if( $sval[$i] ne $sval[$i-1] ){ + push(@final, $sval[$i]); + } + } + + return @final; +} + + + + + + + + + + + + +