annotate GALAXY_FILES/tools/EMBER/Compare_Targets.pl @ 0:003f802d4c7d

Uploaded
author mmaiensc
date Wed, 29 Feb 2012 15:03:33 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
1 #!/usr/bin/perl
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
2 # compares overlaps in the number of unique genes between two sets
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
3
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
4 use Getopt::Long;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
5
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
6 #
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
7 # command line arguments
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
8 #
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
9 $t1 = "";
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
10 $t2 = "";
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
11 $o = "";
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
12 $of = 0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
13 $n = 1;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
14
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
15 $options = "Usage: ./Compare_Targets.pl <OPTIONS>
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
16 -t1 .targets file 1 (EMBER output, required)
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
17 -t2 .targets file 2 (EMBER output, required)
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
18 -o output file (optional, if you want a gene list printed)
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
19 output list prints all unique genes
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
20 -of output type (default $of)
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
21 0 - all shared targets
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
22 1 - all targets in list 1 only
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
23 2 - all targets in list 2 only
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
24 3 - union of list 1 and 2
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
25 -n compare gene names or probe ids (0 = ids, 1 = names, default $n)
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
26 \n";
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
27
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
28 GetOptions('t1=s' => \$t1,
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
29 't2=s' => \$t2,
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
30 'o=s' => \$o,
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
31 'of=i' => \$of,
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
32 'n=i' => \$n
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
33 ) || die "\n$options\n";
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
34
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
35 if( $t1 eq "" ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
36 print "\nError: set a value for -t1\n\n$options\n";
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
37 exit;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
38 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
39 if( $t2 eq "" ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
40 print "\nError: set a value for -t2\n\n$options\n";
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
41 exit;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
42 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
43 if( $of != 0 && $of != 1 && $of != 2 && $of != 3 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
44 print "\nError: set -of to be 0, 1, 2, or 3\n\n$options\n";
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
45 exit;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
46 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
47 if( $n != 0 && $n != 1 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
48 print "\nError: set -n to be 0 or 1\n\n$options\n";
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
49 exit;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
50 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
51
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
52 #
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
53 # read in gene list from each file
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
54 #
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
55 @list1 = &read_list( $t1 );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
56 @list2 = &read_list( $t2 );
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
57
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
58 printf("\nFound %i unique genes in %s, %i in %s\n", $#list1+1, $t1, $#list2+1, $t2);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
59
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
60 #
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
61 # compare lists and print out if desired
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
62 #
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
63 if( $o ne "" ){open(OUT,">$o");}
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
64 $i = 0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
65 $j = 0;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
66 $end1 = $#list1;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
67 $end2 = $#list2;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
68 $l1o = ();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
69 $l2o = ();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
70 $l12 = ();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
71 while( $i<= $end1 && $j<= $end2 ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
72 if( $list1[$i] eq $list2[$j] ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
73 if( $o ne "" && ($of == 0 || $of == 3) ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
74 print OUT "$list1[$i]\n";
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
75 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
76 $l12++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
77 $i++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
78 $j++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
79 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
80 elsif( $list1[$i] lt $list2[$j] ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
81 if( $o ne "" && ($of == 1 || $of == 3) ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
82 print OUT "$list1[$i]\n";
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
83 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
84 $l1o++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
85 $i++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
86 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
87 else{
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
88 if( $o ne "" && ($of == 2 || $of == 3) ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
89 print OUT "$list2[$j]\n";
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
90 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
91 $l2o++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
92 $j++;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
93 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
94 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
95 if( $o ne "" ){close(OUT);}
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
96 printf("\n%s only: %i\n%s only: %i\nshared: %i\n\n", $t1, $l1o, $t2, $l2o, $l12);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
97
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
98
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
99
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
100
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
101
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
102 exit;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
103 ##############
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
104 # read in gene list from .targets file and sort it, then only print those genes that are unique
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
105 sub read_list{
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
106 my @rval;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
107 my @sval;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
108 my @final;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
109
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
110 @rval = ();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
111 open(IN,"$_[0]") || die "Error: can't open file $_[0]\n";
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
112 while($line = <IN>){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
113 chomp($line);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
114 @parts = split(' ',$line);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
115 if( $parts[0] eq "GENE:" ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
116 push(@rval, $parts[1+$n]);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
117 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
118 if( $parts[0] eq "TGENE:" ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
119 push(@rval, $parts[2+$n]);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
120 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
121 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
122 close(IN);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
123
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
124 @sval = sort{ $a cmp $b } @rval;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
125
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
126 @final = ();
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
127 push(@final, @sval[0]);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
128 for($i=1; $i<= $#sval; $i++){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
129 if( $sval[$i] ne $sval[$i-1] ){
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
130 push(@final, $sval[$i]);
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
131 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
132 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
133
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
134 return @final;
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
135 }
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
136
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
137
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
138
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
139
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
140
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
141
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
142
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
143
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
144
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
145
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
146
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
147
003f802d4c7d Uploaded
mmaiensc
parents:
diff changeset
148