diff GALAXY_FILES/tools/EMBER/Compare_Targets.pl @ 0:003f802d4c7d

Uploaded
author mmaiensc
date Wed, 29 Feb 2012 15:03:33 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/GALAXY_FILES/tools/EMBER/Compare_Targets.pl	Wed Feb 29 15:03:33 2012 -0500
@@ -0,0 +1,148 @@
+#!/usr/bin/perl
+# compares overlaps in the number of unique genes between two sets
+
+use Getopt::Long;
+
+#
+# command line arguments
+#
+$t1 = "";
+$t2 = "";
+$o  = "";
+$of = 0;
+$n = 1;
+
+$options = "Usage: ./Compare_Targets.pl <OPTIONS>
+	-t1 .targets file 1 (EMBER output, required)
+	-t2 .targets file 2 (EMBER output, required)
+	-o  output file (optional, if you want a gene list printed)
+	    output list prints all unique genes
+	-of output type (default $of)
+	    0 - all shared targets
+	    1 - all targets in list 1 only
+	    2 - all targets in list 2 only
+	    3 - union of list 1 and 2
+	-n  compare gene names or probe ids (0 = ids, 1 = names, default $n)
+\n";
+
+GetOptions('t1=s' => \$t1,
+	   't2=s' => \$t2,
+	   'o=s'  => \$o,
+	   'of=i' => \$of,
+	   'n=i'  => \$n
+) || die "\n$options\n";
+
+if( $t1 eq "" ){
+	print "\nError: set a value for -t1\n\n$options\n";
+	exit;
+}
+if( $t2 eq "" ){
+	print "\nError: set a value for -t2\n\n$options\n";
+	exit;
+}
+if( $of != 0 && $of != 1 && $of != 2 && $of != 3 ){
+	print "\nError: set -of to be 0, 1, 2, or 3\n\n$options\n";
+	exit;
+}
+if( $n != 0 && $n != 1 ){
+	print "\nError: set -n to be 0 or 1\n\n$options\n";
+	exit;
+}
+
+#
+# read in gene list from each file
+#
+@list1 = &read_list( $t1 );
+@list2 = &read_list( $t2 );
+
+printf("\nFound %i unique genes in %s, %i in %s\n", $#list1+1, $t1, $#list2+1, $t2);
+
+#
+# compare lists and print out if desired
+#
+if( $o ne "" ){open(OUT,">$o");}
+$i = 0;
+$j = 0;
+$end1 = $#list1;
+$end2 = $#list2;
+$l1o = ();
+$l2o = ();
+$l12 = ();
+while( $i<= $end1 && $j<= $end2 ){
+	if( $list1[$i] eq $list2[$j] ){
+		if( $o ne "" && ($of == 0 || $of == 3) ){
+			print OUT "$list1[$i]\n";
+		}
+		$l12++;
+		$i++;
+		$j++;
+	}
+	elsif( $list1[$i] lt $list2[$j] ){
+		if( $o ne "" && ($of == 1 || $of == 3) ){
+			print OUT "$list1[$i]\n";
+		}
+		$l1o++;
+		$i++;
+	}
+	else{
+		if( $o ne "" && ($of == 2 || $of == 3) ){
+			print OUT "$list2[$j]\n";
+		}
+		$l2o++;
+		$j++;
+	}
+}
+if( $o ne "" ){close(OUT);}
+printf("\n%s only: %i\n%s only: %i\nshared: %i\n\n", $t1, $l1o, $t2, $l2o, $l12);
+		
+
+
+
+
+exit;
+##############
+# read in gene list from .targets file and sort it, then only print those genes that are unique
+sub read_list{
+	my @rval;
+	my @sval;
+	my @final;
+
+	@rval = ();
+	open(IN,"$_[0]") || die "Error: can't open file $_[0]\n";
+	while($line = <IN>){
+		chomp($line);
+		@parts = split(' ',$line);
+		if( $parts[0] eq "GENE:" ){
+			push(@rval, $parts[1+$n]);
+		}
+		if( $parts[0] eq "TGENE:" ){
+			push(@rval, $parts[2+$n]);
+		}
+	}
+	close(IN);
+	
+	@sval = sort{ $a cmp $b } @rval;
+
+	@final = ();
+	push(@final, @sval[0]);
+	for($i=1; $i<= $#sval; $i++){
+		if( $sval[$i] ne $sval[$i-1] ){
+			push(@final, $sval[$i]);
+		}
+	}
+	
+	return @final;
+}
+
+
+
+
+
+
+
+
+
+
+
+
+