view GALAXY_FILES/tools/EMBER/Compare_Targets.pl @ 0:003f802d4c7d

Uploaded
author mmaiensc
date Wed, 29 Feb 2012 15:03:33 -0500
parents
children
line wrap: on
line source

#!/usr/bin/perl
# compares overlaps in the number of unique genes between two sets

use Getopt::Long;

#
# command line arguments
#
$t1 = "";
$t2 = "";
$o  = "";
$of = 0;
$n = 1;

$options = "Usage: ./Compare_Targets.pl <OPTIONS>
	-t1 .targets file 1 (EMBER output, required)
	-t2 .targets file 2 (EMBER output, required)
	-o  output file (optional, if you want a gene list printed)
	    output list prints all unique genes
	-of output type (default $of)
	    0 - all shared targets
	    1 - all targets in list 1 only
	    2 - all targets in list 2 only
	    3 - union of list 1 and 2
	-n  compare gene names or probe ids (0 = ids, 1 = names, default $n)
\n";

GetOptions('t1=s' => \$t1,
	   't2=s' => \$t2,
	   'o=s'  => \$o,
	   'of=i' => \$of,
	   'n=i'  => \$n
) || die "\n$options\n";

if( $t1 eq "" ){
	print "\nError: set a value for -t1\n\n$options\n";
	exit;
}
if( $t2 eq "" ){
	print "\nError: set a value for -t2\n\n$options\n";
	exit;
}
if( $of != 0 && $of != 1 && $of != 2 && $of != 3 ){
	print "\nError: set -of to be 0, 1, 2, or 3\n\n$options\n";
	exit;
}
if( $n != 0 && $n != 1 ){
	print "\nError: set -n to be 0 or 1\n\n$options\n";
	exit;
}

#
# read in gene list from each file
#
@list1 = &read_list( $t1 );
@list2 = &read_list( $t2 );

printf("\nFound %i unique genes in %s, %i in %s\n", $#list1+1, $t1, $#list2+1, $t2);

#
# compare lists and print out if desired
#
if( $o ne "" ){open(OUT,">$o");}
$i = 0;
$j = 0;
$end1 = $#list1;
$end2 = $#list2;
$l1o = ();
$l2o = ();
$l12 = ();
while( $i<= $end1 && $j<= $end2 ){
	if( $list1[$i] eq $list2[$j] ){
		if( $o ne "" && ($of == 0 || $of == 3) ){
			print OUT "$list1[$i]\n";
		}
		$l12++;
		$i++;
		$j++;
	}
	elsif( $list1[$i] lt $list2[$j] ){
		if( $o ne "" && ($of == 1 || $of == 3) ){
			print OUT "$list1[$i]\n";
		}
		$l1o++;
		$i++;
	}
	else{
		if( $o ne "" && ($of == 2 || $of == 3) ){
			print OUT "$list2[$j]\n";
		}
		$l2o++;
		$j++;
	}
}
if( $o ne "" ){close(OUT);}
printf("\n%s only: %i\n%s only: %i\nshared: %i\n\n", $t1, $l1o, $t2, $l2o, $l12);
		




exit;
##############
# read in gene list from .targets file and sort it, then only print those genes that are unique
sub read_list{
	my @rval;
	my @sval;
	my @final;

	@rval = ();
	open(IN,"$_[0]") || die "Error: can't open file $_[0]\n";
	while($line = <IN>){
		chomp($line);
		@parts = split(' ',$line);
		if( $parts[0] eq "GENE:" ){
			push(@rval, $parts[1+$n]);
		}
		if( $parts[0] eq "TGENE:" ){
			push(@rval, $parts[2+$n]);
		}
	}
	close(IN);
	
	@sval = sort{ $a cmp $b } @rval;

	@final = ();
	push(@final, @sval[0]);
	for($i=1; $i<= $#sval; $i++){
		if( $sval[$i] ne $sval[$i-1] ){
			push(@final, $sval[$i]);
		}
	}
	
	return @final;
}