Mercurial > repos > mmaiensc > ember
comparison GALAXY_FILES/tools/EMBER/Compare_Targets.pl @ 0:003f802d4c7d
Uploaded
author | mmaiensc |
---|---|
date | Wed, 29 Feb 2012 15:03:33 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:003f802d4c7d |
---|---|
1 #!/usr/bin/perl | |
2 # compares overlaps in the number of unique genes between two sets | |
3 | |
4 use Getopt::Long; | |
5 | |
6 # | |
7 # command line arguments | |
8 # | |
9 $t1 = ""; | |
10 $t2 = ""; | |
11 $o = ""; | |
12 $of = 0; | |
13 $n = 1; | |
14 | |
15 $options = "Usage: ./Compare_Targets.pl <OPTIONS> | |
16 -t1 .targets file 1 (EMBER output, required) | |
17 -t2 .targets file 2 (EMBER output, required) | |
18 -o output file (optional, if you want a gene list printed) | |
19 output list prints all unique genes | |
20 -of output type (default $of) | |
21 0 - all shared targets | |
22 1 - all targets in list 1 only | |
23 2 - all targets in list 2 only | |
24 3 - union of list 1 and 2 | |
25 -n compare gene names or probe ids (0 = ids, 1 = names, default $n) | |
26 \n"; | |
27 | |
28 GetOptions('t1=s' => \$t1, | |
29 't2=s' => \$t2, | |
30 'o=s' => \$o, | |
31 'of=i' => \$of, | |
32 'n=i' => \$n | |
33 ) || die "\n$options\n"; | |
34 | |
35 if( $t1 eq "" ){ | |
36 print "\nError: set a value for -t1\n\n$options\n"; | |
37 exit; | |
38 } | |
39 if( $t2 eq "" ){ | |
40 print "\nError: set a value for -t2\n\n$options\n"; | |
41 exit; | |
42 } | |
43 if( $of != 0 && $of != 1 && $of != 2 && $of != 3 ){ | |
44 print "\nError: set -of to be 0, 1, 2, or 3\n\n$options\n"; | |
45 exit; | |
46 } | |
47 if( $n != 0 && $n != 1 ){ | |
48 print "\nError: set -n to be 0 or 1\n\n$options\n"; | |
49 exit; | |
50 } | |
51 | |
52 # | |
53 # read in gene list from each file | |
54 # | |
55 @list1 = &read_list( $t1 ); | |
56 @list2 = &read_list( $t2 ); | |
57 | |
58 printf("\nFound %i unique genes in %s, %i in %s\n", $#list1+1, $t1, $#list2+1, $t2); | |
59 | |
60 # | |
61 # compare lists and print out if desired | |
62 # | |
63 if( $o ne "" ){open(OUT,">$o");} | |
64 $i = 0; | |
65 $j = 0; | |
66 $end1 = $#list1; | |
67 $end2 = $#list2; | |
68 $l1o = (); | |
69 $l2o = (); | |
70 $l12 = (); | |
71 while( $i<= $end1 && $j<= $end2 ){ | |
72 if( $list1[$i] eq $list2[$j] ){ | |
73 if( $o ne "" && ($of == 0 || $of == 3) ){ | |
74 print OUT "$list1[$i]\n"; | |
75 } | |
76 $l12++; | |
77 $i++; | |
78 $j++; | |
79 } | |
80 elsif( $list1[$i] lt $list2[$j] ){ | |
81 if( $o ne "" && ($of == 1 || $of == 3) ){ | |
82 print OUT "$list1[$i]\n"; | |
83 } | |
84 $l1o++; | |
85 $i++; | |
86 } | |
87 else{ | |
88 if( $o ne "" && ($of == 2 || $of == 3) ){ | |
89 print OUT "$list2[$j]\n"; | |
90 } | |
91 $l2o++; | |
92 $j++; | |
93 } | |
94 } | |
95 if( $o ne "" ){close(OUT);} | |
96 printf("\n%s only: %i\n%s only: %i\nshared: %i\n\n", $t1, $l1o, $t2, $l2o, $l12); | |
97 | |
98 | |
99 | |
100 | |
101 | |
102 exit; | |
103 ############## | |
104 # read in gene list from .targets file and sort it, then only print those genes that are unique | |
105 sub read_list{ | |
106 my @rval; | |
107 my @sval; | |
108 my @final; | |
109 | |
110 @rval = (); | |
111 open(IN,"$_[0]") || die "Error: can't open file $_[0]\n"; | |
112 while($line = <IN>){ | |
113 chomp($line); | |
114 @parts = split(' ',$line); | |
115 if( $parts[0] eq "GENE:" ){ | |
116 push(@rval, $parts[1+$n]); | |
117 } | |
118 if( $parts[0] eq "TGENE:" ){ | |
119 push(@rval, $parts[2+$n]); | |
120 } | |
121 } | |
122 close(IN); | |
123 | |
124 @sval = sort{ $a cmp $b } @rval; | |
125 | |
126 @final = (); | |
127 push(@final, @sval[0]); | |
128 for($i=1; $i<= $#sval; $i++){ | |
129 if( $sval[$i] ne $sval[$i-1] ){ | |
130 push(@final, $sval[$i]); | |
131 } | |
132 } | |
133 | |
134 return @final; | |
135 } | |
136 | |
137 | |
138 | |
139 | |
140 | |
141 | |
142 | |
143 | |
144 | |
145 | |
146 | |
147 | |
148 |