comparison coVennTree/coVennTree.pl @ 0:745aede829e9 draft default tip

Imported from capsule None
author steffen
date Fri, 30 Jan 2015 09:55:45 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:745aede829e9
1 #!/usr/bin/perl
2 use strict;
3 use File::Basename;
4 use List::MoreUtils qw( minmax );
5
6 # --------------------------------------------------------------------------------------------------
7 # author: steffen lott
8 # mail: steffen.lott@uni-freiburg.de
9 # date: 06-10-2014
10 # version: 1.6
11 #
12 # description:
13 # The tool converts an output from MEGAN in a special network which can visuallized with
14 # cytoscape. Gaper produces two files, the first one contains the network and the second one
15 # describes the attributes of the network.
16 # --------------------------------------------------------------------------------------------------
17
18 # return version number
19 if (@ARGV == 0) {
20 print "CoVennTree-Version 1.6\n";
21 print "COMMAND\n";
22 print "coventree argv0 argv1 argv2 argv3 argv4\n";
23 print "--------------\n";
24 print "argv0 = input file\n";
25 print "argv1 = color mode [1,4]\n";
26 print "argv2 = transformation function [1,7]\n";
27 print "argv3 = only leaf information => 0 ; all information => 1\n";
28 print "argv4 = output file name network\n";
29 print "argv5 = output file name attributes\n";
30 exit;
31 }
32
33
34
35
36 # container to represent the network
37 my @network = ();
38
39
40
41 # 0 PARAMETER_______________
42 # read argument from command-line
43 # important: DSV -> taxon-path, count(s) -> assigned -> tab
44 my $megan_file = $ARGV[0];
45
46
47 # 1 PARAMETER_______________
48 my $colorMode;
49 # color mode for venn-diagrams 0,1,2,3,4
50 if(defined $ARGV[1]){
51 $colorMode = $ARGV[1];
52 }else{
53 $colorMode = 3;
54 }
55
56
57 # 2 PARAMETER_______________
58 # 2 different transformations functions
59 my $transFnc = "";
60 if(defined $ARGV[2]){ # small datasets
61 $transFnc = $ARGV[2];
62 }else{
63 $transFnc = 1;
64 }
65
66
67 # 3 PARAMETER_______________
68 # the user can switch between "only leaf information"
69 # or the complete tree information. the last one takes also the not assigned reads
70 # and creates artificial nodes to keep this number
71 my $onlyLeafs;
72 if(defined $ARGV[3]){
73 if($ARGV[3] == 0){
74 $onlyLeafs = "on";
75 }elsif($ARGV[3] == 1){
76 $onlyLeafs = "off";
77 }
78 }else{ # all information will be used! not assigned and assigned
79 $onlyLeafs = "off";
80 }
81
82 # 4 PARAMETER_______________
83 # output -> network
84 my $out_network = $ARGV[4];
85
86 # 5 PARAMETER_______________
87 # output -> attributes
88 my $out_attributes = $ARGV[5];
89
90
91
92 # check the input format of the file. only a file with exactly three datasets are excepted. the other one will fill up with zeros
93
94
95 # read-in MEGAN-file
96 # if #{data-sets} = 1 -> no heade line
97 # if #{data-sets} > 1 -> heade line " #Datasets set1 set2 ..."
98 open(inFile , "<$megan_file") || die "File not found - \"Path-File\"!\n";
99 my @pairIds = ();
100 my $header = "";
101 my @input_file = ();
102 my @numberOfSets = ();
103
104 while(<inFile>){
105 chomp($_);
106 if($_ =~ /^#/){
107 $header = $_;
108 @numberOfSets = split("\t", $_);
109 }else{
110 #print @numberOfSets . "\n";
111 # check the number of datasets are included
112 if(@numberOfSets == 0 || @numberOfSets == 1 || @numberOfSets > 4){ # no set is in the file
113 print "Error: File doesn't contain any dataset or contain more than three!";
114 exit;
115 }elsif(@numberOfSets == 2){ # only one set is in the file -> add 2x zeros
116 $_ .= "\t" . 0 . "\t" . 0;
117 }elsif(@numberOfSets == 3){ # only two sets are in the file -> add 1x zeros
118 $_ .= "\t" . 0;
119 }
120
121 addToNetwork($_);
122 push(@input_file, $_);
123 }
124 }
125 close(inFile);
126
127
128 # --------------------------------------------------------------------------------------------------------------
129 # --------------------------------------------------------------------------------------------------------------
130 # (1) PREPROCESSING: detect all leaf nodes
131 my $modifiedInput = detectNonLeafs();
132
133 # (2) MAIN COMPUTATION: compute deep by deep (path deep ex. root;Viruses; => deep 2)
134 my ($vennClusterOut, $specialNumberOut) = clusterVennBottomUp();
135
136 # (3) VENN-END-PREPERATION: sum up all single values (d1-d3), transform abs values into
137 my $vennToStore = vennForCytoscape($vennClusterOut, $specialNumberOut);
138
139 # (4) SAVE RESULTS INTO FILES: one file contains the network (.sif), the other one contains the attributes
140 storeNetwork();
141 store2FileVenn($vennToStore);
142 # --------------------------------------------------------------------------------------------------------------
143 # --------------------------------------------------------------------------------------------------------------
144
145
146
147
148 sub store2FileVenn{
149 my $outVenn = $_[0];
150 # test
151 my $tmpFileName = $out_attributes;
152
153 #my $tmpFileName = "./network.venn";
154 open(FILE , ">$tmpFileName") || die "File can't be written - \"venn - File\"!\n";
155 print FILE join("\n", @{$outVenn}) . "\n";
156 close(FILE);
157 }
158
159
160 sub vennForCytoscape{
161 my $vennCluster = $_[0];
162 my $specialNum = $_[1];
163 my $specNformat = 0;
164 my @out = ();
165 # datastructure $vennCluster=> vennCluster[]{}{} => values
166 #delete $vennCluster->[0]{"no"};
167 my $frameSize = 0;
168 my $values = 0;
169 my $googleURL = "";
170 my $outStr = "";
171
172 for(my $i = 0 ; $i < @{$vennCluster}; $i++){
173 while ( my($key, $value) = each %{$vennCluster->[$i]} ){
174 while ( my($key2, $value2) = each %{$vennCluster->[$i]{$key}} ){
175 $values = $vennCluster->[$i]{$key}{$key2};
176 $frameSize = getCorrectedFrameSize($values);
177
178 if(defined $specialNum->[$i]{$key}{$key2}){
179 $specNformat = $key2 . "[" . sprintf("%.3f", $specialNum->[$i]{$key}{$key2}) . "]";
180 }else{
181 $specNformat = $key2;
182 }
183
184 # old version, this version works pretty well
185 #$frameSize = getFrameSize($values);
186 $googleURL = computeGoogleApiStrRotation($frameSize,$values,$colorMode);
187 $outStr = $key2 . "\t" . $googleURL . "\t" . $specNformat . "\t" . $values;
188 push(@out, $outStr);
189 }
190 }
191 }
192 return \@out;
193 }
194
195
196 # this function keep the biggest node in the lowest depth,...
197 sub computeGoogleApiStrRotation{
198 my $frameSize = $_[0];
199 my $values = $_[1];
200 my $colMode = $_[2];
201 my @relVal = ();
202 my @col = ();
203 my %sort = ();
204 my @store = ();
205 my %ovHash = ();
206 my @storeOldPos = ();
207 my @spVal = split(" ", $values);
208 my $sum = $spVal[0] + $spVal[1] + $spVal[2];
209
210 # user color-mode
211 if($colMode == 0){
212 $col[0] = "18A3F2"; $col[1] = "FA0800"; $col[2] = "FFF905";
213 }elsif($colMode == 1){
214 $col[0] = "FF2A00"; $col[1] = "9CFF00"; $col[2] = "00CCFF";
215 }elsif($colMode == 2){
216 $col[0] = "B4FF00"; $col[1] = "FF00C6"; $col[2] = "00AEFF";
217 }elsif($colMode == 3){
218 $col[0] = "82FF00"; $col[1] = "7E00FF"; $col[2] = "FF003B";
219 }elsif($colMode == 4){
220 $col[0] = "1A1A1A"; $col[1] = "8A8A8A"; $col[2] = "C7C7C7";
221 }
222
223 $sort{"0"} = $spVal[0]; $sort{"1"} = $spVal[1]; $sort{"2"} = $spVal[2];
224
225 my $tmp = 0;
226 foreach(@spVal){
227 if($sum != 0){
228 $tmp = $_ * 100 / $sum;
229 }else{
230 $tmp = 0;
231 }
232 push(@relVal,$tmp);
233 }
234
235 $ovHash{"01"} = $relVal[3]; $ovHash{"10"} = $relVal[3];
236 $ovHash{"02"} = $relVal[4]; $ovHash{"20"} = $relVal[4];
237 $ovHash{"21"} = $relVal[5]; $ovHash{"12"} = $relVal[5];
238
239 my $url = "http://chart.apis.google.com/chart?chs=" . $frameSize . "x" . $frameSize . "&chco=";
240 # change color position in the google output string corresponding to the highest value
241 foreach my $k( sort {$sort{$b}<=>$sort{$a}} keys %sort) {
242 $url .= $col[$k] . ",";
243 push(@store, $k);
244 }
245 chop($url);
246
247 $url .= "&cht=v&chd=t:";
248 # sort node values in the right order
249 for(my $i = 0 ; $i < @relVal - 4 ; $i++){
250 #print $i . "\t" . $store[$i] . "\t" . $relVal[$store[$i]] . "\n";
251 $url .= sprintf("%.1f", $relVal[$store[$i]]) . ",";
252 }
253 # sort intersection values in the right order
254 my $tStr0 = $store[0] . $store[1];
255 my $tStr1 = $store[0] . $store[2];
256 my $tStr2 = $store[1] . $store[2];
257 $url .= sprintf("%.1f", $ovHash{$tStr0}) . "," . sprintf("%.1f", $ovHash{$tStr1}) . "," . sprintf("%.1f", $ovHash{$tStr2}) . ",";
258 $url .= "0.0";
259 $url .= "&chf=bg,s,e0dede00";
260 }
261
262
263 # original function without any node rotation. the order of the nodes is always the same
264 sub computeGoogleApiStr{
265 my $frameSize = $_[0];
266 my $values = $_[1];
267 my @relVal = ();
268 my @spVal = split(" ", $values);
269 my $sum = $spVal[0] + $spVal[1] + $spVal[2];
270
271 foreach(@spVal){
272 my $tmp = $_ * 100 / $sum;
273 push(@relVal,$tmp);
274 }
275 my $url = "http://chart.apis.google.com/chart?chs=" . $frameSize . "x" . $frameSize .
276 #color
277 "&chco=FF6342,ADDE63,63C6DE" .
278 #"&chco=0000FF,0099FF,00FFFF" .
279
280 "&cht=v&chd=t:";
281
282 for(my $i = 0 ; $i < @relVal - 1 ; $i++){
283 $url .= sprintf("%.1f", $relVal[$i]) . ",";
284 }
285 $url .= sprintf("%.1f", $relVal[-1]);
286
287 $url .= "&chf=bg,s,e0dede00";
288 }
289
290
291 sub getCorrectedFrameSize{
292 my $values = $_[0];
293 my @spVal = split(" ", $values);
294 my $sum = $spVal[0] + $spVal[1] + $spVal[2];
295 my $frame = lookupPixelSQRT($sum);
296
297 # find maxValue position ([0] - [2])
298 my $arrPos = getMaxPos($spVal[0], $spVal[1], $spVal[2]);
299 my $addOver= -1;
300 my $addNode= -1;
301 my $addSum = -1;
302
303 if($arrPos == 0){
304 $addNode = $spVal[1] + $spVal[2];
305 $addOver = $spVal[3] + $spVal[4];
306 }elsif($arrPos == 1){
307 $addNode = $spVal[0] + $spVal[2];
308 $addOver = $spVal[3] + $spVal[5];
309 }elsif($arrPos == 2){
310 $addNode = $spVal[0] + $spVal[1];
311 $addOver = $spVal[4] + $spVal[5];
312 }
313 # if the 2 of 3 nodes have no overlap to the largest one, than the complete value
314 # will be used to compute a frame and add this to the existing frame
315 $addSum = $addNode - $addOver;
316
317 my $addFrame = lookupPixelSQRT($addSum);
318 my $sumFrame = $frame + $addFrame;
319 return $sumFrame;
320 }
321
322
323 sub getMaxPos{
324 my $pos = -1;
325 if( ($_[0] >= $_[1]) && ($_[0] >= $_[2]) ){
326 $pos = 0;
327 }elsif( ($_[1] >= $_[0]) && ($_[1] >= $_[2]) ){
328 $pos = 1;
329 }else{
330 $pos = 2;
331 }
332 return $pos;
333 }
334
335
336 sub getFrameSize{
337 my $values = $_[0];
338 my @spVal = split(" ", $values);
339 my $sum = $spVal[0] + $spVal[1] + $spVal[2];
340 my $frame = lookupPixel($sum);
341 return $frame;
342 }
343
344
345
346 sub clusterVennBottomUp{
347 # transform $modifiedInput into datastructure
348 # container => [deep]{parent}{child}
349 my @container = ();
350 my @containerSpecial = ();
351 my @nodeValues = ();
352 my $maxDeep = 0;
353 my %helperHash = ();
354 my %specialMatrixAll = ();
355
356 foreach(@{$modifiedInput}){
357 my @tmpArr = split('\t', $_);
358 my @path = split(';' , $tmpArr[0]);
359 my $deep = @path - 1;
360
361 if(($deep - 1) >= 0){
362 $container[$deep]{$path[-2]}{$path[-1]} = $tmpArr[1];
363 $nodeValues[$deep]{$path[-2]}{$path[-1]} = "f";
364 }else{
365 $container[$deep]{"no"}{$path[-1]} = $tmpArr[1];
366 }
367 }
368 # start computation from the deepest path to the root node
369 for(my $i = (@container-1) ; $i >= 0 ; $i--){
370 while ( my($key, $value) = each %{$container[$i]} ){
371 # update all predecessor nodes
372 while ( my($keyUp, $valueUp) = each %helperHash ){
373 if(exists $container[$i]{$key}{$keyUp}){
374 $container[$i]{$key}{$keyUp} = $valueUp;
375 # compute special value by decompose venn's and add special value
376 $containerSpecial[$i]{$key}{$keyUp} = vennCongruousness(\@{$specialMatrixAll{$keyUp}});
377 }
378 }
379 # group all nodes which has the same predecessor id and sum up the values
380 while ( my($key2, $value2) = each %{$container[$i]{$key}} ){
381 if(exists $helperHash{$key}){
382 $helperHash{$key} = addValues($helperHash{$key}, $value2);
383 #push(@{$specialMatrixAll{$key}}, $value2);
384 #print $key . "\t" . $value2 . "\n";
385 }else{
386 $helperHash{$key} = $value2;
387 }
388 #print $key . "\t" . $value2 . "\n";
389 push(@{$specialMatrixAll{$key}}, $value2);
390 }
391 }
392 }
393 return \@container, \@containerSpecial;
394 }
395
396
397 sub vennCongruousness{
398 my $inSpecMatrix = $_[0];
399 my $numOfSets = @numberOfSets - 1;
400 my @arrVal = (); my @matrix = ();
401 my @sum = ();
402 my $numVenn = 0;
403 my %actSet = ();
404 my %actOvp = ();
405 $actSet{"result"} = 0;
406 $actOvp{"result"} = 0;
407
408 # (step 1) - sum up rows
409 foreach (@{$inSpecMatrix}){
410 @arrVal = split(" ", $_);
411 $sum[0] += $arrVal[0]; $sum[1] += $arrVal[1]; $sum[2] += $arrVal[2];
412 $sum[3] += $arrVal[3]; $sum[4] += $arrVal[4]; $sum[5] += $arrVal[5];
413
414 if($arrVal[0] > 0){
415 if(!(exists $actSet{1})){
416 $actSet{1} = 1;
417 $actSet{"result"} += 1;
418 }
419 }
420 if($arrVal[1] > 0){
421 if(!(exists $actSet{2})){
422 $actSet{2} = 1;
423 $actSet{"result"} += 1;
424 }
425 }
426 if($arrVal[2] > 0){
427 if(!(exists $actSet{3})){
428 $actSet{3} = 1;
429 $actSet{"result"} += 1;
430 }
431 }
432 if($arrVal[3] > 0){
433 if(!(exists $actOvp{1})){
434 $actOvp{1} = 1;
435 $actOvp{"result"} += 1;
436 }
437 }
438 if($arrVal[4] > 0){
439 if(!(exists $actOvp{2})){
440 $actOvp{2} = 1;
441 $actOvp{"result"} += 1;
442 }
443 }
444 if($arrVal[5] > 0){
445 if(!(exists $actOvp{3})){
446 $actOvp{3} = 1;
447 $actOvp{"result"} += 1;
448 }
449 }
450 }
451
452 # (step 2) - calc ratios (-1)
453 my $i = 0;
454 foreach (@{$inSpecMatrix}){
455 @arrVal = split(" ", $_);
456 for(my $j = 0 ; $j < @arrVal ; $j++){ # eventuell -1 da index von 0 - 6 anstatt 0 - 5 laeuft
457 # div zero !
458 if($arrVal[$j] == 0){
459 $matrix[$i][$j] = 0;
460 }else{
461 #print $j . "\t" . $sum[$j] . " \t" . $arrVal[$j] . "\n";
462 $matrix[$i][$j] = $sum[$j] / $arrVal[$j];
463 }
464 }
465 $i++;
466 }
467
468 $numVenn = $i;
469 # (step 3) - sum up data set ratios d1-d3
470 @sum = ();
471 for(my $j = 0 ; $j < @matrix; $j++){
472 $sum[0] += $matrix[$j][0]; $sum[1] += $matrix[$j][1]; $sum[2] += $matrix[$j][2];
473 $sum[3] += $matrix[$j][3]; $sum[4] += $matrix[$j][4]; $sum[5] += $matrix[$j][5];
474 }
475 # (step 4) - calc ratios -> max(d_i, #{V}) / min(d_i, #{V})
476 my @condensedM = (); my $max = 0; my $min = 0;
477 for(my $j = 0 ; $j < @sum ; $j++){
478 $max = ($numVenn, $sum[$j])[$numVenn < $sum[$j]];
479 $min = ($numVenn, $sum[$j])[$numVenn > $sum[$j]];
480
481 if($min == 0){
482 $sum[$j] = 0;
483 }else{
484 #$sum[$j] = $max / $min;
485 $sum[$j] = $sum[$j] / $numVenn;
486 }
487 #print "-> " . $j . "\t" . $sum[$j] . "\t" . $max . "\t" . $min . "\n";
488 }
489 # (step 5) - normalize values between zero and one -> [0..1]
490 for(my $j = 0 ; $j < @sum ; $j++){
491 $max = ($numVenn, $sum[$j])[$numVenn < $sum[$j]];
492 $min = ($numVenn, $sum[$j])[$numVenn > $sum[$j]];
493
494 if($max == 0){
495 $sum[$j] = 0;
496 }else{
497 $sum[$j] = $min / $max;
498 }
499 #print "=> " . $j . "\t" . $sum[$j] . "\t" . $min . "\t" . $max . "\n";
500 }
501 # (step 6) - combine all decomposed values and create only one value
502 # case a: only one dataset -> $numOfSets == 1
503 if($numOfSets == 1){
504 #print "res: " . $sum[0] . "\n";
505 return $sum[0];
506 }elsif($numOfSets == 2){
507 # evtl fallunteruntescheiung
508 print "sum1: " . $sum[0] . "\t" . "sum2: " . $sum[1] . "\t" . "ovp1-2: " . $sum[3] . "\t" . "sets: " . $actSet{"result"} . "\t" . "ovp: " . $actOvp{"result"} . "\n";
509 if($actOvp{"result"} == 0){
510 my $t = ((($sum[0] + $sum[1]) / $actSet{"result"}) );
511 #print "res2 " . $t . " ***\n";
512 return ((($sum[0] + $sum[1]) / $actSet{"result"}) );
513 }else{
514 print "foobar\n";
515 my $t = ((((($sum[0] + $sum[1]) / $actSet{"result"}) + $sum[3]) / 2) );
516 #print "res2* " . $t . " ***\n";
517 return ((((($sum[0] + $sum[1]) / $actSet{"result"}) + $sum[3]) / 2) );
518 }
519 }elsif($numOfSets == 3){
520 #print $sum[0] . "\t" . $sum[1] . "\t" . $sum[2] . "\t" . $sum[3] . "\t" . $sum[4] . "\t" . $sum[5] . "\n";
521 #print $actSet{"result"} . "\t" . $actOvp{"result"} . "\n";
522 #return ((((($sum[0] + $sum[1] + $sum[2]) / $numOfSets) + (($sum[3] + $sum[4] + $sum[5]) / $numOfSets) ) / 2) );
523
524 if($actOvp{"result"} == 0){
525 my $t = (($sum[0] + $sum[1] + $sum[2]) / $actSet{"result"});
526 #print ">>>>>>> " . $t. "\n";
527 return (($sum[0] + $sum[1] + $sum[2]) / $actSet{"result"});
528 }else{
529 my $t = ((((($sum[0] + $sum[1] + $sum[2]) / $actSet{"result"}) + (($sum[3] + $sum[4] + $sum[5]) / $actOvp{"result"}) ) / 2));
530 #print ">>>>>>> " . $t. "\n";
531 return ((((($sum[0] + $sum[1] + $sum[2]) / $actSet{"result"}) + (($sum[3] + $sum[4] + $sum[5]) / $actOvp{"result"}) ) / 2));
532 }
533 }else{
534 return -1;
535 }
536 }
537
538
539 # save version of function clusterVennBottomUp()
540 #sub clusterVennBottomUp{
541 # # transform $modifiedInput into datastructure
542 # # container => [deep]{parent}{child}
543 # my @container = ();
544 # my $maxDeep = 0;
545 # my %helperHash = ();
546 #
547 # foreach(@{$modifiedInput}){
548 # my @tmpArr = split('\t', $_);
549 # my @path = split(';' , $tmpArr[0]);
550 # my $deep = @path - 1;
551 #
552 # if(($deep - 1) >= 0){
553 # $container[$deep]{$path[-2]}{$path[-1]} = $tmpArr[1];
554 # }else{
555 # $container[$deep]{"no"}{$path[-1]} = $tmpArr[1];
556 # }
557 # }
558 # # start computation from the deepest path to the root node
559 # for(my $i = (@container-1) ; $i >= 0 ; $i--){
560 # while ( my($key, $value) = each %{$container[$i]} ){
561 # # update all predecessor nodes
562 # while ( my($keyUp, $valueUp) = each %helperHash ){
563 # if(exists $container[$i]{$key}{$keyUp}){
564 # $container[$i]{$key}{$keyUp} = $valueUp;
565 # }
566 # }
567 # # group all nodes which has the same predecessor id and sum up the values
568 # while ( my($key2, $value2) = each %{$container[$i]{$key}} ){
569 # if(exists $helperHash{$key}){
570 # $helperHash{$key} = addValues($helperHash{$key}, $value2);
571 # }else{
572 # $helperHash{$key} = $value2;
573 # }
574 # }
575 # }
576 # }
577 # return \@container;
578 #}
579
580
581
582 sub addValues{
583 my $val1 = $_[0];
584 my $val2 = $_[1];
585
586 my @sV1 = split(" ", $val1);
587 my @sV2 = split(" ", $val2);
588
589 my $tmp = $sV1[0] + $sV2[0];
590 my $out = $tmp;
591
592 for(my $i = 1 ; $i < @sV1 ; $i++){
593 $tmp = $sV1[$i] + $sV2[$i];
594 $out .= " " . $tmp;
595 }
596 return $out;
597 }
598
599
600
601 # detect non leaf nodes and remove the values
602 # works on @input_file !!!
603 # this version works only with 3 depths!
604 sub detectNonLeafs{
605 my %recursiveValues = ();
606 my @modifiedFile = ();
607 my $convertedPath = "";
608
609 my @additionalNetwork = ();
610
611 # read last line
612 my @tmpArr1 = split('\t',$input_file[($#input_file)],2);
613 # -2 path direction from reward instead from the beginning. (-1 leaf,child , -2 parent,inner node)
614 my $parent1 = getId($tmpArr1[0],-2);
615 my $child1 = getId($tmpArr1[0],-1);
616 my $deep1 = getPathDeep($tmpArr1[0]);
617 my $parent2 = "";
618 my $child2 = "";
619 my $deep2 = 0;
620
621
622 # if "if-statement is true, only root node exists"
623 my $outStr = "";
624 if($parent1 == -1){
625 $outStr = convertPath($tmpArr1[0]) . "\t" . computeLeafValues($tmpArr1[1]);
626 push(@modifiedFile, $outStr);
627 }else{
628 $outStr = convertPath($tmpArr1[0]) . "\t" . computeLeafValues($tmpArr1[1]);
629 push(@modifiedFile, $outStr);
630
631 for(my $i = (@input_file-2) ; $i >= 0 ; $i--){
632 @tmpArr1 = split('\t',$input_file[$i],2);
633 $parent2 = getId($tmpArr1[0],-2);
634 $child2 = getId($tmpArr1[0],-1);
635 $deep2 = getPathDeep($tmpArr1[0]);
636
637 #print $parent2 . "\t" . $child2 . "\n";
638
639 # if eq true -> new leaf
640 if($parent2 eq $parent1){
641 $outStr = convertPath($tmpArr1[0]) . "\t" . computeLeafValues($tmpArr1[1]);
642 push(@modifiedFile, $outStr);
643 }elsif($parent1 eq $child2){
644 $outStr = convertPath($tmpArr1[0]) . "\t" . "undef";
645 push(@modifiedFile, $outStr);
646
647 my @check = split('\t', $tmpArr1[1]);
648 my $tSum = 0;
649 foreach(@check){
650 $tSum += $_;
651 }
652 if(($onlyLeafs eq "off") && ($tSum > 0)){
653 $outStr = convertPath($tmpArr1[0]) . "not_assigned_" . $child2 . ";" . "\t" . computeLeafValues($tmpArr1[1]);
654 push(@modifiedFile, $outStr);
655 $outStr = $child2 . " pp " . "not_assigned_" . $child2;
656 push(@network, $outStr);
657 }
658 }else{
659 $outStr = convertPath($tmpArr1[0]) . "\t" . computeLeafValues($tmpArr1[1]);
660 push(@modifiedFile, $outStr);
661 }
662
663 if($parent1 == -1){
664 push(@modifiedFile, convertPath($tmpArr1[0]));
665 last;
666 }
667 $parent1 = $parent2;
668 $child1 = $child2;
669 $deep1 = $deep2;
670 }
671 }
672 # store @additionalNetwork in .sif file!!! at this point, the sif file exists!
673 # it is stored into @network container. this container is globel defined!
674
675 return \@modifiedFile;
676 }
677
678
679 # helper function for detectNonLeafs
680 sub getPathDeep{
681 my $inPath = $_[0];
682 my @deep = split(';', $inPath);
683 my $size = $#deep;
684 return $size;
685 }
686
687 sub convertPath{
688 my $inString = $_[0];
689 $inString =~ s/"//g;
690 $inString =~ s/\s+/_/g;
691 return $inString;
692 }
693
694 sub getId{
695 my $lineToParse = $_[0];
696 my $idPos = $_[1];
697 my $stringId = "";
698 my @path = ();
699
700 $lineToParse =~ s/"//g;
701 $lineToParse =~ s/\s+/_/g;
702 @path = split(';',$lineToParse);
703 my $num = @path;
704
705 if(($num + $idPos) < 0){
706 return -1;
707 }else{
708 return $path[$idPos];
709 }
710 }
711
712 sub computeLeafValues{
713 my $meganValues = $_[0];
714 my @rawValues = split('\t', $meganValues);
715 my @nodeRelVal = ();
716
717 my $outValues = $rawValues[0] . " " . $rawValues[1] . " " . $rawValues[2];
718
719 if($rawValues[0] <= $rawValues[1]){
720 $outValues .= " " . $rawValues[0];
721 }else{
722 $outValues .= " " . $rawValues[1];
723 }
724 if($rawValues[0] <= $rawValues[2]){
725 $outValues .= " " . $rawValues[0];
726 }else{
727 $outValues .= " " . $rawValues[2];
728 }
729 if($rawValues[1] <= $rawValues[2]){
730 $outValues .= " " . $rawValues[1];
731 }else{
732 $outValues .= " " . $rawValues[2];
733 }
734 #my ($min, $max) = minmax @rawValues;
735 my $min = 0;
736 $outValues .= " " . $min;
737
738 return $outValues;
739 }
740 # -----------------------------------------------------------------------------
741
742
743 # compute network (.sif)
744 sub addToNetwork{
745 my $inLine = $_[0];
746 my @splitInLine = split('\t',$inLine);
747 # remove ' " ' from line
748 $splitInLine[0] =~ s/"//g;
749 $splitInLine[0] =~ s/\s+/_/g;
750 my @elements = split(';' ,$splitInLine[0]);
751
752 if(@elements > 1){
753 my $outString = $elements[-2] . " pp " . $elements[-1];
754 push(@network, $outString);
755 }
756 }
757
758
759 # store network in .sif file
760 sub storeNetwork{
761 # test
762 my $tmpFileName = $out_network;
763
764 #my $tmpFileName = "./network.sif";
765 open(FILE , ">$tmpFileName") || die "File can't be written - \"sif - File\"!\n";
766 print FILE join("\n", @network) . "\n";
767 close(FILE);
768 }
769
770
771 # ---------------------------------------------------------------------------------------------
772 # two different lookup-tables are available!
773 # lookupPixel() => static ; lookupPixelSQRT() => dynamic
774 #
775 # lookup absolute node-size to pixel (frame-size for venn-diagram)
776 sub lookupPixel{
777 my $query = $_[0];
778
779 if($query < 10){
780 return 30;
781 }elsif($query < 100){
782 return 40;
783 }elsif($query < 1000){
784 return 50;
785 }elsif($query < 10000){
786 return 60;
787 }elsif($query < 100000){
788 return 80;
789 }elsif($query < 1000000){
790 return 100;
791 }elsif($query < 10000000){
792 return 140;
793 }elsif($query < 20000000){
794 return 180;
795 }elsif($query < 30000000){
796 return 220;
797 }else{
798 return 250;
799 }
800 }
801
802 # lookup absolute node-size to pixel (frame-size for venn-diagram) <- this is currently used!
803 sub lookupPixelSQRT{
804
805 if ($transFnc == 0) {
806 return int(($_[0] ** (1/(1.6))) * 1.8 + 8); # 3,000 datapoints in sum
807 }elsif($transFnc == 1){
808 return int(($_[0] ** (1/(2.1))) * 1.8 + 8); # 30,000 datapoints in sum
809 }elsif($transFnc == 2){
810 return int(($_[0] ** (1/(2.6))) * 1.8 + 8); # 300,000 datapoints in sum
811 }elsif($transFnc == 3){
812 return int(($_[0] ** (1/(3.1))) * 1.8 + 8); # 3,000,000 datapoints in sum
813 }elsif($transFnc == 4){
814 return int(($_[0] ** (1/(3.7))) * 1.8 + 8); # 30,000,000 datapoints in sum
815 }elsif($transFnc == 5){
816 return int(($_[0] ** (1/(4))) * 1.8 + 8); # 300,000,000 datapoints in sum
817 }elsif($transFnc == 6){
818 return int(($_[0] ** (1/(4.7))) * 1.8 + 8); # 3,000,000,000 datapoints in sum
819 }
820
821 #return int(($_[0] ** (1/(3.3))) * 1.8 + 30); # test version for small and large datasets?
822 #return int(($_[0] ** (1/(3.3))) * 1.8 + 5); # test version for small and large datasets?
823 #return int(($_[0] ** (1/(4))) * 1.8 + 8); # test version for small and large datasets?
824 #return int(($_[0] ** (1/6)) * 12); # old version this version is good for large datasets
825 }
826
827
828
829
830
831
832
833