comparison GALAXY_FILES/tools/EMBER/Compare_Targets.pl @ 3:037c3edda16e

Uploaded
author mmaiensc
date Thu, 22 Mar 2012 13:49:52 -0400
parents 003f802d4c7d
children
comparison
equal deleted inserted replaced
2:1a84b8178b45 3:037c3edda16e
1 #!/usr/bin/perl
2 # compares overlaps in the number of unique genes between two sets
3
4 use Getopt::Long;
5
6 #
7 # command line arguments
8 #
9 $t1 = "";
10 $t2 = "";
11 $o = "";
12 $of = 0;
13 $n = 1;
14
15 $options = "Usage: ./Compare_Targets.pl <OPTIONS>
16 -t1 .targets file 1 (EMBER output, required)
17 -t2 .targets file 2 (EMBER output, required)
18 -o output file (optional, if you want a gene list printed)
19 output list prints all unique genes
20 -of output type (default $of)
21 0 - all shared targets
22 1 - all targets in list 1 only
23 2 - all targets in list 2 only
24 3 - union of list 1 and 2
25 -n compare gene names or probe ids (0 = ids, 1 = names, default $n)
26 \n";
27
28 GetOptions('t1=s' => \$t1,
29 't2=s' => \$t2,
30 'o=s' => \$o,
31 'of=i' => \$of,
32 'n=i' => \$n
33 ) || die "\n$options\n";
34
35 if( $t1 eq "" ){
36 print "\nError: set a value for -t1\n\n$options\n";
37 exit;
38 }
39 if( $t2 eq "" ){
40 print "\nError: set a value for -t2\n\n$options\n";
41 exit;
42 }
43 if( $of != 0 && $of != 1 && $of != 2 && $of != 3 ){
44 print "\nError: set -of to be 0, 1, 2, or 3\n\n$options\n";
45 exit;
46 }
47 if( $n != 0 && $n != 1 ){
48 print "\nError: set -n to be 0 or 1\n\n$options\n";
49 exit;
50 }
51
52 #
53 # read in gene list from each file
54 #
55 @list1 = &read_list( $t1 );
56 @list2 = &read_list( $t2 );
57
58 printf("\nFound %i unique genes in %s, %i in %s\n", $#list1+1, $t1, $#list2+1, $t2);
59
60 #
61 # compare lists and print out if desired
62 #
63 if( $o ne "" ){open(OUT,">$o");}
64 $i = 0;
65 $j = 0;
66 $end1 = $#list1;
67 $end2 = $#list2;
68 $l1o = ();
69 $l2o = ();
70 $l12 = ();
71 while( $i<= $end1 && $j<= $end2 ){
72 if( $list1[$i] eq $list2[$j] ){
73 if( $o ne "" && ($of == 0 || $of == 3) ){
74 print OUT "$list1[$i]\n";
75 }
76 $l12++;
77 $i++;
78 $j++;
79 }
80 elsif( $list1[$i] lt $list2[$j] ){
81 if( $o ne "" && ($of == 1 || $of == 3) ){
82 print OUT "$list1[$i]\n";
83 }
84 $l1o++;
85 $i++;
86 }
87 else{
88 if( $o ne "" && ($of == 2 || $of == 3) ){
89 print OUT "$list2[$j]\n";
90 }
91 $l2o++;
92 $j++;
93 }
94 }
95 if( $o ne "" ){close(OUT);}
96 printf("\n%s only: %i\n%s only: %i\nshared: %i\n\n", $t1, $l1o, $t2, $l2o, $l12);
97
98
99
100
101
102 exit;
103 ##############
104 # read in gene list from .targets file and sort it, then only print those genes that are unique
105 sub read_list{
106 my @rval;
107 my @sval;
108 my @final;
109
110 @rval = ();
111 open(IN,"$_[0]") || die "Error: can't open file $_[0]\n";
112 while($line = <IN>){
113 chomp($line);
114 @parts = split(' ',$line);
115 if( $parts[0] eq "GENE:" ){
116 push(@rval, $parts[1+$n]);
117 }
118 if( $parts[0] eq "TGENE:" ){
119 push(@rval, $parts[2+$n]);
120 }
121 }
122 close(IN);
123
124 @sval = sort{ $a cmp $b } @rval;
125
126 @final = ();
127 push(@final, @sval[0]);
128 for($i=1; $i<= $#sval; $i++){
129 if( $sval[$i] ne $sval[$i-1] ){
130 push(@final, $sval[$i]);
131 }
132 }
133
134 return @final;
135 }
136
137
138
139
140
141
142
143
144
145
146
147
148