Mercurial > repos > steffen > covenntree
comparison coVennTree/coVennTree.pl @ 0:745aede829e9 draft default tip
Imported from capsule None
author | steffen |
---|---|
date | Fri, 30 Jan 2015 09:55:45 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:745aede829e9 |
---|---|
1 #!/usr/bin/perl | |
2 use strict; | |
3 use File::Basename; | |
4 use List::MoreUtils qw( minmax ); | |
5 | |
6 # -------------------------------------------------------------------------------------------------- | |
7 # author: steffen lott | |
8 # mail: steffen.lott@uni-freiburg.de | |
9 # date: 06-10-2014 | |
10 # version: 1.6 | |
11 # | |
12 # description: | |
13 # The tool converts an output from MEGAN in a special network which can visuallized with | |
14 # cytoscape. Gaper produces two files, the first one contains the network and the second one | |
15 # describes the attributes of the network. | |
16 # -------------------------------------------------------------------------------------------------- | |
17 | |
18 # return version number | |
19 if (@ARGV == 0) { | |
20 print "CoVennTree-Version 1.6\n"; | |
21 print "COMMAND\n"; | |
22 print "coventree argv0 argv1 argv2 argv3 argv4\n"; | |
23 print "--------------\n"; | |
24 print "argv0 = input file\n"; | |
25 print "argv1 = color mode [1,4]\n"; | |
26 print "argv2 = transformation function [1,7]\n"; | |
27 print "argv3 = only leaf information => 0 ; all information => 1\n"; | |
28 print "argv4 = output file name network\n"; | |
29 print "argv5 = output file name attributes\n"; | |
30 exit; | |
31 } | |
32 | |
33 | |
34 | |
35 | |
36 # container to represent the network | |
37 my @network = (); | |
38 | |
39 | |
40 | |
41 # 0 PARAMETER_______________ | |
42 # read argument from command-line | |
43 # important: DSV -> taxon-path, count(s) -> assigned -> tab | |
44 my $megan_file = $ARGV[0]; | |
45 | |
46 | |
47 # 1 PARAMETER_______________ | |
48 my $colorMode; | |
49 # color mode for venn-diagrams 0,1,2,3,4 | |
50 if(defined $ARGV[1]){ | |
51 $colorMode = $ARGV[1]; | |
52 }else{ | |
53 $colorMode = 3; | |
54 } | |
55 | |
56 | |
57 # 2 PARAMETER_______________ | |
58 # 2 different transformations functions | |
59 my $transFnc = ""; | |
60 if(defined $ARGV[2]){ # small datasets | |
61 $transFnc = $ARGV[2]; | |
62 }else{ | |
63 $transFnc = 1; | |
64 } | |
65 | |
66 | |
67 # 3 PARAMETER_______________ | |
68 # the user can switch between "only leaf information" | |
69 # or the complete tree information. the last one takes also the not assigned reads | |
70 # and creates artificial nodes to keep this number | |
71 my $onlyLeafs; | |
72 if(defined $ARGV[3]){ | |
73 if($ARGV[3] == 0){ | |
74 $onlyLeafs = "on"; | |
75 }elsif($ARGV[3] == 1){ | |
76 $onlyLeafs = "off"; | |
77 } | |
78 }else{ # all information will be used! not assigned and assigned | |
79 $onlyLeafs = "off"; | |
80 } | |
81 | |
82 # 4 PARAMETER_______________ | |
83 # output -> network | |
84 my $out_network = $ARGV[4]; | |
85 | |
86 # 5 PARAMETER_______________ | |
87 # output -> attributes | |
88 my $out_attributes = $ARGV[5]; | |
89 | |
90 | |
91 | |
92 # check the input format of the file. only a file with exactly three datasets are excepted. the other one will fill up with zeros | |
93 | |
94 | |
95 # read-in MEGAN-file | |
96 # if #{data-sets} = 1 -> no heade line | |
97 # if #{data-sets} > 1 -> heade line " #Datasets set1 set2 ..." | |
98 open(inFile , "<$megan_file") || die "File not found - \"Path-File\"!\n"; | |
99 my @pairIds = (); | |
100 my $header = ""; | |
101 my @input_file = (); | |
102 my @numberOfSets = (); | |
103 | |
104 while(<inFile>){ | |
105 chomp($_); | |
106 if($_ =~ /^#/){ | |
107 $header = $_; | |
108 @numberOfSets = split("\t", $_); | |
109 }else{ | |
110 #print @numberOfSets . "\n"; | |
111 # check the number of datasets are included | |
112 if(@numberOfSets == 0 || @numberOfSets == 1 || @numberOfSets > 4){ # no set is in the file | |
113 print "Error: File doesn't contain any dataset or contain more than three!"; | |
114 exit; | |
115 }elsif(@numberOfSets == 2){ # only one set is in the file -> add 2x zeros | |
116 $_ .= "\t" . 0 . "\t" . 0; | |
117 }elsif(@numberOfSets == 3){ # only two sets are in the file -> add 1x zeros | |
118 $_ .= "\t" . 0; | |
119 } | |
120 | |
121 addToNetwork($_); | |
122 push(@input_file, $_); | |
123 } | |
124 } | |
125 close(inFile); | |
126 | |
127 | |
128 # -------------------------------------------------------------------------------------------------------------- | |
129 # -------------------------------------------------------------------------------------------------------------- | |
130 # (1) PREPROCESSING: detect all leaf nodes | |
131 my $modifiedInput = detectNonLeafs(); | |
132 | |
133 # (2) MAIN COMPUTATION: compute deep by deep (path deep ex. root;Viruses; => deep 2) | |
134 my ($vennClusterOut, $specialNumberOut) = clusterVennBottomUp(); | |
135 | |
136 # (3) VENN-END-PREPERATION: sum up all single values (d1-d3), transform abs values into | |
137 my $vennToStore = vennForCytoscape($vennClusterOut, $specialNumberOut); | |
138 | |
139 # (4) SAVE RESULTS INTO FILES: one file contains the network (.sif), the other one contains the attributes | |
140 storeNetwork(); | |
141 store2FileVenn($vennToStore); | |
142 # -------------------------------------------------------------------------------------------------------------- | |
143 # -------------------------------------------------------------------------------------------------------------- | |
144 | |
145 | |
146 | |
147 | |
148 sub store2FileVenn{ | |
149 my $outVenn = $_[0]; | |
150 # test | |
151 my $tmpFileName = $out_attributes; | |
152 | |
153 #my $tmpFileName = "./network.venn"; | |
154 open(FILE , ">$tmpFileName") || die "File can't be written - \"venn - File\"!\n"; | |
155 print FILE join("\n", @{$outVenn}) . "\n"; | |
156 close(FILE); | |
157 } | |
158 | |
159 | |
160 sub vennForCytoscape{ | |
161 my $vennCluster = $_[0]; | |
162 my $specialNum = $_[1]; | |
163 my $specNformat = 0; | |
164 my @out = (); | |
165 # datastructure $vennCluster=> vennCluster[]{}{} => values | |
166 #delete $vennCluster->[0]{"no"}; | |
167 my $frameSize = 0; | |
168 my $values = 0; | |
169 my $googleURL = ""; | |
170 my $outStr = ""; | |
171 | |
172 for(my $i = 0 ; $i < @{$vennCluster}; $i++){ | |
173 while ( my($key, $value) = each %{$vennCluster->[$i]} ){ | |
174 while ( my($key2, $value2) = each %{$vennCluster->[$i]{$key}} ){ | |
175 $values = $vennCluster->[$i]{$key}{$key2}; | |
176 $frameSize = getCorrectedFrameSize($values); | |
177 | |
178 if(defined $specialNum->[$i]{$key}{$key2}){ | |
179 $specNformat = $key2 . "[" . sprintf("%.3f", $specialNum->[$i]{$key}{$key2}) . "]"; | |
180 }else{ | |
181 $specNformat = $key2; | |
182 } | |
183 | |
184 # old version, this version works pretty well | |
185 #$frameSize = getFrameSize($values); | |
186 $googleURL = computeGoogleApiStrRotation($frameSize,$values,$colorMode); | |
187 $outStr = $key2 . "\t" . $googleURL . "\t" . $specNformat . "\t" . $values; | |
188 push(@out, $outStr); | |
189 } | |
190 } | |
191 } | |
192 return \@out; | |
193 } | |
194 | |
195 | |
196 # this function keep the biggest node in the lowest depth,... | |
197 sub computeGoogleApiStrRotation{ | |
198 my $frameSize = $_[0]; | |
199 my $values = $_[1]; | |
200 my $colMode = $_[2]; | |
201 my @relVal = (); | |
202 my @col = (); | |
203 my %sort = (); | |
204 my @store = (); | |
205 my %ovHash = (); | |
206 my @storeOldPos = (); | |
207 my @spVal = split(" ", $values); | |
208 my $sum = $spVal[0] + $spVal[1] + $spVal[2]; | |
209 | |
210 # user color-mode | |
211 if($colMode == 0){ | |
212 $col[0] = "18A3F2"; $col[1] = "FA0800"; $col[2] = "FFF905"; | |
213 }elsif($colMode == 1){ | |
214 $col[0] = "FF2A00"; $col[1] = "9CFF00"; $col[2] = "00CCFF"; | |
215 }elsif($colMode == 2){ | |
216 $col[0] = "B4FF00"; $col[1] = "FF00C6"; $col[2] = "00AEFF"; | |
217 }elsif($colMode == 3){ | |
218 $col[0] = "82FF00"; $col[1] = "7E00FF"; $col[2] = "FF003B"; | |
219 }elsif($colMode == 4){ | |
220 $col[0] = "1A1A1A"; $col[1] = "8A8A8A"; $col[2] = "C7C7C7"; | |
221 } | |
222 | |
223 $sort{"0"} = $spVal[0]; $sort{"1"} = $spVal[1]; $sort{"2"} = $spVal[2]; | |
224 | |
225 my $tmp = 0; | |
226 foreach(@spVal){ | |
227 if($sum != 0){ | |
228 $tmp = $_ * 100 / $sum; | |
229 }else{ | |
230 $tmp = 0; | |
231 } | |
232 push(@relVal,$tmp); | |
233 } | |
234 | |
235 $ovHash{"01"} = $relVal[3]; $ovHash{"10"} = $relVal[3]; | |
236 $ovHash{"02"} = $relVal[4]; $ovHash{"20"} = $relVal[4]; | |
237 $ovHash{"21"} = $relVal[5]; $ovHash{"12"} = $relVal[5]; | |
238 | |
239 my $url = "http://chart.apis.google.com/chart?chs=" . $frameSize . "x" . $frameSize . "&chco="; | |
240 # change color position in the google output string corresponding to the highest value | |
241 foreach my $k( sort {$sort{$b}<=>$sort{$a}} keys %sort) { | |
242 $url .= $col[$k] . ","; | |
243 push(@store, $k); | |
244 } | |
245 chop($url); | |
246 | |
247 $url .= "&cht=v&chd=t:"; | |
248 # sort node values in the right order | |
249 for(my $i = 0 ; $i < @relVal - 4 ; $i++){ | |
250 #print $i . "\t" . $store[$i] . "\t" . $relVal[$store[$i]] . "\n"; | |
251 $url .= sprintf("%.1f", $relVal[$store[$i]]) . ","; | |
252 } | |
253 # sort intersection values in the right order | |
254 my $tStr0 = $store[0] . $store[1]; | |
255 my $tStr1 = $store[0] . $store[2]; | |
256 my $tStr2 = $store[1] . $store[2]; | |
257 $url .= sprintf("%.1f", $ovHash{$tStr0}) . "," . sprintf("%.1f", $ovHash{$tStr1}) . "," . sprintf("%.1f", $ovHash{$tStr2}) . ","; | |
258 $url .= "0.0"; | |
259 $url .= "&chf=bg,s,e0dede00"; | |
260 } | |
261 | |
262 | |
263 # original function without any node rotation. the order of the nodes is always the same | |
264 sub computeGoogleApiStr{ | |
265 my $frameSize = $_[0]; | |
266 my $values = $_[1]; | |
267 my @relVal = (); | |
268 my @spVal = split(" ", $values); | |
269 my $sum = $spVal[0] + $spVal[1] + $spVal[2]; | |
270 | |
271 foreach(@spVal){ | |
272 my $tmp = $_ * 100 / $sum; | |
273 push(@relVal,$tmp); | |
274 } | |
275 my $url = "http://chart.apis.google.com/chart?chs=" . $frameSize . "x" . $frameSize . | |
276 #color | |
277 "&chco=FF6342,ADDE63,63C6DE" . | |
278 #"&chco=0000FF,0099FF,00FFFF" . | |
279 | |
280 "&cht=v&chd=t:"; | |
281 | |
282 for(my $i = 0 ; $i < @relVal - 1 ; $i++){ | |
283 $url .= sprintf("%.1f", $relVal[$i]) . ","; | |
284 } | |
285 $url .= sprintf("%.1f", $relVal[-1]); | |
286 | |
287 $url .= "&chf=bg,s,e0dede00"; | |
288 } | |
289 | |
290 | |
291 sub getCorrectedFrameSize{ | |
292 my $values = $_[0]; | |
293 my @spVal = split(" ", $values); | |
294 my $sum = $spVal[0] + $spVal[1] + $spVal[2]; | |
295 my $frame = lookupPixelSQRT($sum); | |
296 | |
297 # find maxValue position ([0] - [2]) | |
298 my $arrPos = getMaxPos($spVal[0], $spVal[1], $spVal[2]); | |
299 my $addOver= -1; | |
300 my $addNode= -1; | |
301 my $addSum = -1; | |
302 | |
303 if($arrPos == 0){ | |
304 $addNode = $spVal[1] + $spVal[2]; | |
305 $addOver = $spVal[3] + $spVal[4]; | |
306 }elsif($arrPos == 1){ | |
307 $addNode = $spVal[0] + $spVal[2]; | |
308 $addOver = $spVal[3] + $spVal[5]; | |
309 }elsif($arrPos == 2){ | |
310 $addNode = $spVal[0] + $spVal[1]; | |
311 $addOver = $spVal[4] + $spVal[5]; | |
312 } | |
313 # if the 2 of 3 nodes have no overlap to the largest one, than the complete value | |
314 # will be used to compute a frame and add this to the existing frame | |
315 $addSum = $addNode - $addOver; | |
316 | |
317 my $addFrame = lookupPixelSQRT($addSum); | |
318 my $sumFrame = $frame + $addFrame; | |
319 return $sumFrame; | |
320 } | |
321 | |
322 | |
323 sub getMaxPos{ | |
324 my $pos = -1; | |
325 if( ($_[0] >= $_[1]) && ($_[0] >= $_[2]) ){ | |
326 $pos = 0; | |
327 }elsif( ($_[1] >= $_[0]) && ($_[1] >= $_[2]) ){ | |
328 $pos = 1; | |
329 }else{ | |
330 $pos = 2; | |
331 } | |
332 return $pos; | |
333 } | |
334 | |
335 | |
336 sub getFrameSize{ | |
337 my $values = $_[0]; | |
338 my @spVal = split(" ", $values); | |
339 my $sum = $spVal[0] + $spVal[1] + $spVal[2]; | |
340 my $frame = lookupPixel($sum); | |
341 return $frame; | |
342 } | |
343 | |
344 | |
345 | |
346 sub clusterVennBottomUp{ | |
347 # transform $modifiedInput into datastructure | |
348 # container => [deep]{parent}{child} | |
349 my @container = (); | |
350 my @containerSpecial = (); | |
351 my @nodeValues = (); | |
352 my $maxDeep = 0; | |
353 my %helperHash = (); | |
354 my %specialMatrixAll = (); | |
355 | |
356 foreach(@{$modifiedInput}){ | |
357 my @tmpArr = split('\t', $_); | |
358 my @path = split(';' , $tmpArr[0]); | |
359 my $deep = @path - 1; | |
360 | |
361 if(($deep - 1) >= 0){ | |
362 $container[$deep]{$path[-2]}{$path[-1]} = $tmpArr[1]; | |
363 $nodeValues[$deep]{$path[-2]}{$path[-1]} = "f"; | |
364 }else{ | |
365 $container[$deep]{"no"}{$path[-1]} = $tmpArr[1]; | |
366 } | |
367 } | |
368 # start computation from the deepest path to the root node | |
369 for(my $i = (@container-1) ; $i >= 0 ; $i--){ | |
370 while ( my($key, $value) = each %{$container[$i]} ){ | |
371 # update all predecessor nodes | |
372 while ( my($keyUp, $valueUp) = each %helperHash ){ | |
373 if(exists $container[$i]{$key}{$keyUp}){ | |
374 $container[$i]{$key}{$keyUp} = $valueUp; | |
375 # compute special value by decompose venn's and add special value | |
376 $containerSpecial[$i]{$key}{$keyUp} = vennCongruousness(\@{$specialMatrixAll{$keyUp}}); | |
377 } | |
378 } | |
379 # group all nodes which has the same predecessor id and sum up the values | |
380 while ( my($key2, $value2) = each %{$container[$i]{$key}} ){ | |
381 if(exists $helperHash{$key}){ | |
382 $helperHash{$key} = addValues($helperHash{$key}, $value2); | |
383 #push(@{$specialMatrixAll{$key}}, $value2); | |
384 #print $key . "\t" . $value2 . "\n"; | |
385 }else{ | |
386 $helperHash{$key} = $value2; | |
387 } | |
388 #print $key . "\t" . $value2 . "\n"; | |
389 push(@{$specialMatrixAll{$key}}, $value2); | |
390 } | |
391 } | |
392 } | |
393 return \@container, \@containerSpecial; | |
394 } | |
395 | |
396 | |
397 sub vennCongruousness{ | |
398 my $inSpecMatrix = $_[0]; | |
399 my $numOfSets = @numberOfSets - 1; | |
400 my @arrVal = (); my @matrix = (); | |
401 my @sum = (); | |
402 my $numVenn = 0; | |
403 my %actSet = (); | |
404 my %actOvp = (); | |
405 $actSet{"result"} = 0; | |
406 $actOvp{"result"} = 0; | |
407 | |
408 # (step 1) - sum up rows | |
409 foreach (@{$inSpecMatrix}){ | |
410 @arrVal = split(" ", $_); | |
411 $sum[0] += $arrVal[0]; $sum[1] += $arrVal[1]; $sum[2] += $arrVal[2]; | |
412 $sum[3] += $arrVal[3]; $sum[4] += $arrVal[4]; $sum[5] += $arrVal[5]; | |
413 | |
414 if($arrVal[0] > 0){ | |
415 if(!(exists $actSet{1})){ | |
416 $actSet{1} = 1; | |
417 $actSet{"result"} += 1; | |
418 } | |
419 } | |
420 if($arrVal[1] > 0){ | |
421 if(!(exists $actSet{2})){ | |
422 $actSet{2} = 1; | |
423 $actSet{"result"} += 1; | |
424 } | |
425 } | |
426 if($arrVal[2] > 0){ | |
427 if(!(exists $actSet{3})){ | |
428 $actSet{3} = 1; | |
429 $actSet{"result"} += 1; | |
430 } | |
431 } | |
432 if($arrVal[3] > 0){ | |
433 if(!(exists $actOvp{1})){ | |
434 $actOvp{1} = 1; | |
435 $actOvp{"result"} += 1; | |
436 } | |
437 } | |
438 if($arrVal[4] > 0){ | |
439 if(!(exists $actOvp{2})){ | |
440 $actOvp{2} = 1; | |
441 $actOvp{"result"} += 1; | |
442 } | |
443 } | |
444 if($arrVal[5] > 0){ | |
445 if(!(exists $actOvp{3})){ | |
446 $actOvp{3} = 1; | |
447 $actOvp{"result"} += 1; | |
448 } | |
449 } | |
450 } | |
451 | |
452 # (step 2) - calc ratios (-1) | |
453 my $i = 0; | |
454 foreach (@{$inSpecMatrix}){ | |
455 @arrVal = split(" ", $_); | |
456 for(my $j = 0 ; $j < @arrVal ; $j++){ # eventuell -1 da index von 0 - 6 anstatt 0 - 5 laeuft | |
457 # div zero ! | |
458 if($arrVal[$j] == 0){ | |
459 $matrix[$i][$j] = 0; | |
460 }else{ | |
461 #print $j . "\t" . $sum[$j] . " \t" . $arrVal[$j] . "\n"; | |
462 $matrix[$i][$j] = $sum[$j] / $arrVal[$j]; | |
463 } | |
464 } | |
465 $i++; | |
466 } | |
467 | |
468 $numVenn = $i; | |
469 # (step 3) - sum up data set ratios d1-d3 | |
470 @sum = (); | |
471 for(my $j = 0 ; $j < @matrix; $j++){ | |
472 $sum[0] += $matrix[$j][0]; $sum[1] += $matrix[$j][1]; $sum[2] += $matrix[$j][2]; | |
473 $sum[3] += $matrix[$j][3]; $sum[4] += $matrix[$j][4]; $sum[5] += $matrix[$j][5]; | |
474 } | |
475 # (step 4) - calc ratios -> max(d_i, #{V}) / min(d_i, #{V}) | |
476 my @condensedM = (); my $max = 0; my $min = 0; | |
477 for(my $j = 0 ; $j < @sum ; $j++){ | |
478 $max = ($numVenn, $sum[$j])[$numVenn < $sum[$j]]; | |
479 $min = ($numVenn, $sum[$j])[$numVenn > $sum[$j]]; | |
480 | |
481 if($min == 0){ | |
482 $sum[$j] = 0; | |
483 }else{ | |
484 #$sum[$j] = $max / $min; | |
485 $sum[$j] = $sum[$j] / $numVenn; | |
486 } | |
487 #print "-> " . $j . "\t" . $sum[$j] . "\t" . $max . "\t" . $min . "\n"; | |
488 } | |
489 # (step 5) - normalize values between zero and one -> [0..1] | |
490 for(my $j = 0 ; $j < @sum ; $j++){ | |
491 $max = ($numVenn, $sum[$j])[$numVenn < $sum[$j]]; | |
492 $min = ($numVenn, $sum[$j])[$numVenn > $sum[$j]]; | |
493 | |
494 if($max == 0){ | |
495 $sum[$j] = 0; | |
496 }else{ | |
497 $sum[$j] = $min / $max; | |
498 } | |
499 #print "=> " . $j . "\t" . $sum[$j] . "\t" . $min . "\t" . $max . "\n"; | |
500 } | |
501 # (step 6) - combine all decomposed values and create only one value | |
502 # case a: only one dataset -> $numOfSets == 1 | |
503 if($numOfSets == 1){ | |
504 #print "res: " . $sum[0] . "\n"; | |
505 return $sum[0]; | |
506 }elsif($numOfSets == 2){ | |
507 # evtl fallunteruntescheiung | |
508 print "sum1: " . $sum[0] . "\t" . "sum2: " . $sum[1] . "\t" . "ovp1-2: " . $sum[3] . "\t" . "sets: " . $actSet{"result"} . "\t" . "ovp: " . $actOvp{"result"} . "\n"; | |
509 if($actOvp{"result"} == 0){ | |
510 my $t = ((($sum[0] + $sum[1]) / $actSet{"result"}) ); | |
511 #print "res2 " . $t . " ***\n"; | |
512 return ((($sum[0] + $sum[1]) / $actSet{"result"}) ); | |
513 }else{ | |
514 print "foobar\n"; | |
515 my $t = ((((($sum[0] + $sum[1]) / $actSet{"result"}) + $sum[3]) / 2) ); | |
516 #print "res2* " . $t . " ***\n"; | |
517 return ((((($sum[0] + $sum[1]) / $actSet{"result"}) + $sum[3]) / 2) ); | |
518 } | |
519 }elsif($numOfSets == 3){ | |
520 #print $sum[0] . "\t" . $sum[1] . "\t" . $sum[2] . "\t" . $sum[3] . "\t" . $sum[4] . "\t" . $sum[5] . "\n"; | |
521 #print $actSet{"result"} . "\t" . $actOvp{"result"} . "\n"; | |
522 #return ((((($sum[0] + $sum[1] + $sum[2]) / $numOfSets) + (($sum[3] + $sum[4] + $sum[5]) / $numOfSets) ) / 2) ); | |
523 | |
524 if($actOvp{"result"} == 0){ | |
525 my $t = (($sum[0] + $sum[1] + $sum[2]) / $actSet{"result"}); | |
526 #print ">>>>>>> " . $t. "\n"; | |
527 return (($sum[0] + $sum[1] + $sum[2]) / $actSet{"result"}); | |
528 }else{ | |
529 my $t = ((((($sum[0] + $sum[1] + $sum[2]) / $actSet{"result"}) + (($sum[3] + $sum[4] + $sum[5]) / $actOvp{"result"}) ) / 2)); | |
530 #print ">>>>>>> " . $t. "\n"; | |
531 return ((((($sum[0] + $sum[1] + $sum[2]) / $actSet{"result"}) + (($sum[3] + $sum[4] + $sum[5]) / $actOvp{"result"}) ) / 2)); | |
532 } | |
533 }else{ | |
534 return -1; | |
535 } | |
536 } | |
537 | |
538 | |
539 # save version of function clusterVennBottomUp() | |
540 #sub clusterVennBottomUp{ | |
541 # # transform $modifiedInput into datastructure | |
542 # # container => [deep]{parent}{child} | |
543 # my @container = (); | |
544 # my $maxDeep = 0; | |
545 # my %helperHash = (); | |
546 # | |
547 # foreach(@{$modifiedInput}){ | |
548 # my @tmpArr = split('\t', $_); | |
549 # my @path = split(';' , $tmpArr[0]); | |
550 # my $deep = @path - 1; | |
551 # | |
552 # if(($deep - 1) >= 0){ | |
553 # $container[$deep]{$path[-2]}{$path[-1]} = $tmpArr[1]; | |
554 # }else{ | |
555 # $container[$deep]{"no"}{$path[-1]} = $tmpArr[1]; | |
556 # } | |
557 # } | |
558 # # start computation from the deepest path to the root node | |
559 # for(my $i = (@container-1) ; $i >= 0 ; $i--){ | |
560 # while ( my($key, $value) = each %{$container[$i]} ){ | |
561 # # update all predecessor nodes | |
562 # while ( my($keyUp, $valueUp) = each %helperHash ){ | |
563 # if(exists $container[$i]{$key}{$keyUp}){ | |
564 # $container[$i]{$key}{$keyUp} = $valueUp; | |
565 # } | |
566 # } | |
567 # # group all nodes which has the same predecessor id and sum up the values | |
568 # while ( my($key2, $value2) = each %{$container[$i]{$key}} ){ | |
569 # if(exists $helperHash{$key}){ | |
570 # $helperHash{$key} = addValues($helperHash{$key}, $value2); | |
571 # }else{ | |
572 # $helperHash{$key} = $value2; | |
573 # } | |
574 # } | |
575 # } | |
576 # } | |
577 # return \@container; | |
578 #} | |
579 | |
580 | |
581 | |
582 sub addValues{ | |
583 my $val1 = $_[0]; | |
584 my $val2 = $_[1]; | |
585 | |
586 my @sV1 = split(" ", $val1); | |
587 my @sV2 = split(" ", $val2); | |
588 | |
589 my $tmp = $sV1[0] + $sV2[0]; | |
590 my $out = $tmp; | |
591 | |
592 for(my $i = 1 ; $i < @sV1 ; $i++){ | |
593 $tmp = $sV1[$i] + $sV2[$i]; | |
594 $out .= " " . $tmp; | |
595 } | |
596 return $out; | |
597 } | |
598 | |
599 | |
600 | |
601 # detect non leaf nodes and remove the values | |
602 # works on @input_file !!! | |
603 # this version works only with 3 depths! | |
604 sub detectNonLeafs{ | |
605 my %recursiveValues = (); | |
606 my @modifiedFile = (); | |
607 my $convertedPath = ""; | |
608 | |
609 my @additionalNetwork = (); | |
610 | |
611 # read last line | |
612 my @tmpArr1 = split('\t',$input_file[($#input_file)],2); | |
613 # -2 path direction from reward instead from the beginning. (-1 leaf,child , -2 parent,inner node) | |
614 my $parent1 = getId($tmpArr1[0],-2); | |
615 my $child1 = getId($tmpArr1[0],-1); | |
616 my $deep1 = getPathDeep($tmpArr1[0]); | |
617 my $parent2 = ""; | |
618 my $child2 = ""; | |
619 my $deep2 = 0; | |
620 | |
621 | |
622 # if "if-statement is true, only root node exists" | |
623 my $outStr = ""; | |
624 if($parent1 == -1){ | |
625 $outStr = convertPath($tmpArr1[0]) . "\t" . computeLeafValues($tmpArr1[1]); | |
626 push(@modifiedFile, $outStr); | |
627 }else{ | |
628 $outStr = convertPath($tmpArr1[0]) . "\t" . computeLeafValues($tmpArr1[1]); | |
629 push(@modifiedFile, $outStr); | |
630 | |
631 for(my $i = (@input_file-2) ; $i >= 0 ; $i--){ | |
632 @tmpArr1 = split('\t',$input_file[$i],2); | |
633 $parent2 = getId($tmpArr1[0],-2); | |
634 $child2 = getId($tmpArr1[0],-1); | |
635 $deep2 = getPathDeep($tmpArr1[0]); | |
636 | |
637 #print $parent2 . "\t" . $child2 . "\n"; | |
638 | |
639 # if eq true -> new leaf | |
640 if($parent2 eq $parent1){ | |
641 $outStr = convertPath($tmpArr1[0]) . "\t" . computeLeafValues($tmpArr1[1]); | |
642 push(@modifiedFile, $outStr); | |
643 }elsif($parent1 eq $child2){ | |
644 $outStr = convertPath($tmpArr1[0]) . "\t" . "undef"; | |
645 push(@modifiedFile, $outStr); | |
646 | |
647 my @check = split('\t', $tmpArr1[1]); | |
648 my $tSum = 0; | |
649 foreach(@check){ | |
650 $tSum += $_; | |
651 } | |
652 if(($onlyLeafs eq "off") && ($tSum > 0)){ | |
653 $outStr = convertPath($tmpArr1[0]) . "not_assigned_" . $child2 . ";" . "\t" . computeLeafValues($tmpArr1[1]); | |
654 push(@modifiedFile, $outStr); | |
655 $outStr = $child2 . " pp " . "not_assigned_" . $child2; | |
656 push(@network, $outStr); | |
657 } | |
658 }else{ | |
659 $outStr = convertPath($tmpArr1[0]) . "\t" . computeLeafValues($tmpArr1[1]); | |
660 push(@modifiedFile, $outStr); | |
661 } | |
662 | |
663 if($parent1 == -1){ | |
664 push(@modifiedFile, convertPath($tmpArr1[0])); | |
665 last; | |
666 } | |
667 $parent1 = $parent2; | |
668 $child1 = $child2; | |
669 $deep1 = $deep2; | |
670 } | |
671 } | |
672 # store @additionalNetwork in .sif file!!! at this point, the sif file exists! | |
673 # it is stored into @network container. this container is globel defined! | |
674 | |
675 return \@modifiedFile; | |
676 } | |
677 | |
678 | |
679 # helper function for detectNonLeafs | |
680 sub getPathDeep{ | |
681 my $inPath = $_[0]; | |
682 my @deep = split(';', $inPath); | |
683 my $size = $#deep; | |
684 return $size; | |
685 } | |
686 | |
687 sub convertPath{ | |
688 my $inString = $_[0]; | |
689 $inString =~ s/"//g; | |
690 $inString =~ s/\s+/_/g; | |
691 return $inString; | |
692 } | |
693 | |
694 sub getId{ | |
695 my $lineToParse = $_[0]; | |
696 my $idPos = $_[1]; | |
697 my $stringId = ""; | |
698 my @path = (); | |
699 | |
700 $lineToParse =~ s/"//g; | |
701 $lineToParse =~ s/\s+/_/g; | |
702 @path = split(';',$lineToParse); | |
703 my $num = @path; | |
704 | |
705 if(($num + $idPos) < 0){ | |
706 return -1; | |
707 }else{ | |
708 return $path[$idPos]; | |
709 } | |
710 } | |
711 | |
712 sub computeLeafValues{ | |
713 my $meganValues = $_[0]; | |
714 my @rawValues = split('\t', $meganValues); | |
715 my @nodeRelVal = (); | |
716 | |
717 my $outValues = $rawValues[0] . " " . $rawValues[1] . " " . $rawValues[2]; | |
718 | |
719 if($rawValues[0] <= $rawValues[1]){ | |
720 $outValues .= " " . $rawValues[0]; | |
721 }else{ | |
722 $outValues .= " " . $rawValues[1]; | |
723 } | |
724 if($rawValues[0] <= $rawValues[2]){ | |
725 $outValues .= " " . $rawValues[0]; | |
726 }else{ | |
727 $outValues .= " " . $rawValues[2]; | |
728 } | |
729 if($rawValues[1] <= $rawValues[2]){ | |
730 $outValues .= " " . $rawValues[1]; | |
731 }else{ | |
732 $outValues .= " " . $rawValues[2]; | |
733 } | |
734 #my ($min, $max) = minmax @rawValues; | |
735 my $min = 0; | |
736 $outValues .= " " . $min; | |
737 | |
738 return $outValues; | |
739 } | |
740 # ----------------------------------------------------------------------------- | |
741 | |
742 | |
743 # compute network (.sif) | |
744 sub addToNetwork{ | |
745 my $inLine = $_[0]; | |
746 my @splitInLine = split('\t',$inLine); | |
747 # remove ' " ' from line | |
748 $splitInLine[0] =~ s/"//g; | |
749 $splitInLine[0] =~ s/\s+/_/g; | |
750 my @elements = split(';' ,$splitInLine[0]); | |
751 | |
752 if(@elements > 1){ | |
753 my $outString = $elements[-2] . " pp " . $elements[-1]; | |
754 push(@network, $outString); | |
755 } | |
756 } | |
757 | |
758 | |
759 # store network in .sif file | |
760 sub storeNetwork{ | |
761 # test | |
762 my $tmpFileName = $out_network; | |
763 | |
764 #my $tmpFileName = "./network.sif"; | |
765 open(FILE , ">$tmpFileName") || die "File can't be written - \"sif - File\"!\n"; | |
766 print FILE join("\n", @network) . "\n"; | |
767 close(FILE); | |
768 } | |
769 | |
770 | |
771 # --------------------------------------------------------------------------------------------- | |
772 # two different lookup-tables are available! | |
773 # lookupPixel() => static ; lookupPixelSQRT() => dynamic | |
774 # | |
775 # lookup absolute node-size to pixel (frame-size for venn-diagram) | |
776 sub lookupPixel{ | |
777 my $query = $_[0]; | |
778 | |
779 if($query < 10){ | |
780 return 30; | |
781 }elsif($query < 100){ | |
782 return 40; | |
783 }elsif($query < 1000){ | |
784 return 50; | |
785 }elsif($query < 10000){ | |
786 return 60; | |
787 }elsif($query < 100000){ | |
788 return 80; | |
789 }elsif($query < 1000000){ | |
790 return 100; | |
791 }elsif($query < 10000000){ | |
792 return 140; | |
793 }elsif($query < 20000000){ | |
794 return 180; | |
795 }elsif($query < 30000000){ | |
796 return 220; | |
797 }else{ | |
798 return 250; | |
799 } | |
800 } | |
801 | |
802 # lookup absolute node-size to pixel (frame-size for venn-diagram) <- this is currently used! | |
803 sub lookupPixelSQRT{ | |
804 | |
805 if ($transFnc == 0) { | |
806 return int(($_[0] ** (1/(1.6))) * 1.8 + 8); # 3,000 datapoints in sum | |
807 }elsif($transFnc == 1){ | |
808 return int(($_[0] ** (1/(2.1))) * 1.8 + 8); # 30,000 datapoints in sum | |
809 }elsif($transFnc == 2){ | |
810 return int(($_[0] ** (1/(2.6))) * 1.8 + 8); # 300,000 datapoints in sum | |
811 }elsif($transFnc == 3){ | |
812 return int(($_[0] ** (1/(3.1))) * 1.8 + 8); # 3,000,000 datapoints in sum | |
813 }elsif($transFnc == 4){ | |
814 return int(($_[0] ** (1/(3.7))) * 1.8 + 8); # 30,000,000 datapoints in sum | |
815 }elsif($transFnc == 5){ | |
816 return int(($_[0] ** (1/(4))) * 1.8 + 8); # 300,000,000 datapoints in sum | |
817 }elsif($transFnc == 6){ | |
818 return int(($_[0] ** (1/(4.7))) * 1.8 + 8); # 3,000,000,000 datapoints in sum | |
819 } | |
820 | |
821 #return int(($_[0] ** (1/(3.3))) * 1.8 + 30); # test version for small and large datasets? | |
822 #return int(($_[0] ** (1/(3.3))) * 1.8 + 5); # test version for small and large datasets? | |
823 #return int(($_[0] ** (1/(4))) * 1.8 + 8); # test version for small and large datasets? | |
824 #return int(($_[0] ** (1/6)) * 12); # old version this version is good for large datasets | |
825 } | |
826 | |
827 | |
828 | |
829 | |
830 | |
831 | |
832 | |
833 |