Mercurial > repos > mmaiensc > ember
view GALAXY_FILES/tools/EMBER/Compare_Targets.pl @ 0:003f802d4c7d
Uploaded
author | mmaiensc |
---|---|
date | Wed, 29 Feb 2012 15:03:33 -0500 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/perl # compares overlaps in the number of unique genes between two sets use Getopt::Long; # # command line arguments # $t1 = ""; $t2 = ""; $o = ""; $of = 0; $n = 1; $options = "Usage: ./Compare_Targets.pl <OPTIONS> -t1 .targets file 1 (EMBER output, required) -t2 .targets file 2 (EMBER output, required) -o output file (optional, if you want a gene list printed) output list prints all unique genes -of output type (default $of) 0 - all shared targets 1 - all targets in list 1 only 2 - all targets in list 2 only 3 - union of list 1 and 2 -n compare gene names or probe ids (0 = ids, 1 = names, default $n) \n"; GetOptions('t1=s' => \$t1, 't2=s' => \$t2, 'o=s' => \$o, 'of=i' => \$of, 'n=i' => \$n ) || die "\n$options\n"; if( $t1 eq "" ){ print "\nError: set a value for -t1\n\n$options\n"; exit; } if( $t2 eq "" ){ print "\nError: set a value for -t2\n\n$options\n"; exit; } if( $of != 0 && $of != 1 && $of != 2 && $of != 3 ){ print "\nError: set -of to be 0, 1, 2, or 3\n\n$options\n"; exit; } if( $n != 0 && $n != 1 ){ print "\nError: set -n to be 0 or 1\n\n$options\n"; exit; } # # read in gene list from each file # @list1 = &read_list( $t1 ); @list2 = &read_list( $t2 ); printf("\nFound %i unique genes in %s, %i in %s\n", $#list1+1, $t1, $#list2+1, $t2); # # compare lists and print out if desired # if( $o ne "" ){open(OUT,">$o");} $i = 0; $j = 0; $end1 = $#list1; $end2 = $#list2; $l1o = (); $l2o = (); $l12 = (); while( $i<= $end1 && $j<= $end2 ){ if( $list1[$i] eq $list2[$j] ){ if( $o ne "" && ($of == 0 || $of == 3) ){ print OUT "$list1[$i]\n"; } $l12++; $i++; $j++; } elsif( $list1[$i] lt $list2[$j] ){ if( $o ne "" && ($of == 1 || $of == 3) ){ print OUT "$list1[$i]\n"; } $l1o++; $i++; } else{ if( $o ne "" && ($of == 2 || $of == 3) ){ print OUT "$list2[$j]\n"; } $l2o++; $j++; } } if( $o ne "" ){close(OUT);} printf("\n%s only: %i\n%s only: %i\nshared: %i\n\n", $t1, $l1o, $t2, $l2o, $l12); exit; ############## # read in gene list from .targets file and sort it, then only print those genes that are unique sub read_list{ my @rval; my @sval; my @final; @rval = (); open(IN,"$_[0]") || die "Error: can't open file $_[0]\n"; while($line = <IN>){ chomp($line); @parts = split(' ',$line); if( $parts[0] eq "GENE:" ){ push(@rval, $parts[1+$n]); } if( $parts[0] eq "TGENE:" ){ push(@rval, $parts[2+$n]); } } close(IN); @sval = sort{ $a cmp $b } @rval; @final = (); push(@final, @sval[0]); for($i=1; $i<= $#sval; $i++){ if( $sval[$i] ne $sval[$i-1] ){ push(@final, $sval[$i]); } } return @final; }