view coVennTree/coVennTree.pl @ 0:745aede829e9 draft default tip

Imported from capsule None
author steffen
date Fri, 30 Jan 2015 09:55:45 -0500
parents
children
line wrap: on
line source

#!/usr/bin/perl
use strict;
use File::Basename;
use List::MoreUtils qw( minmax );

# --------------------------------------------------------------------------------------------------
# author:	steffen lott
# mail: 	steffen.lott@uni-freiburg.de
# date: 	06-10-2014
# version: 	1.6
# 
# description:
# 	The tool converts an output from MEGAN in a special network which can visuallized with
#	cytoscape. Gaper produces two files, the first one contains the network and the second one
#	describes the attributes of the network. 
# --------------------------------------------------------------------------------------------------

# return version number
if (@ARGV == 0) {
	print "CoVennTree-Version 1.6\n";
	print "COMMAND\n";
	print "coventree argv0 argv1 argv2 argv3 argv4\n";
	print "--------------\n";
	print "argv0 = input file\n";
	print "argv1 = color mode [1,4]\n";
	print "argv2 = transformation function [1,7]\n";
	print "argv3 = only leaf information => 0 ; all information => 1\n";
	print "argv4 = output file name network\n";
	print "argv5 = output file name attributes\n";
	exit;
}




# container to represent the network
my @network = ();



# 0 PARAMETER_______________
# read argument from command-line
# important: DSV -> taxon-path, count(s) -> assigned -> tab
my $megan_file = $ARGV[0];


# 1 PARAMETER_______________
my $colorMode;   
# color mode for venn-diagrams 0,1,2,3,4
if(defined $ARGV[1]){
	$colorMode = $ARGV[1];
}else{
	$colorMode = 3;
}


# 2 PARAMETER_______________
# 2 different transformations functions
my $transFnc = "";
if(defined $ARGV[2]){          # small datasets
   	$transFnc = $ARGV[2];
}else{
	$transFnc = 1;
}


# 3 PARAMETER_______________
# the user can switch between "only leaf information" 
# or the complete tree information. the last one takes also the not assigned reads
# and creates artificial nodes to keep this number
my $onlyLeafs;
if(defined $ARGV[3]){
	if($ARGV[3] == 0){
		$onlyLeafs = "on";
	}elsif($ARGV[3] == 1){
		$onlyLeafs = "off";
	}
}else{ # all information will be used! not assigned and assigned
	$onlyLeafs     = "off";
}

# 4 PARAMETER_______________
# output -> network
my $out_network    = $ARGV[4];

# 5 PARAMETER_______________
# output -> attributes
my $out_attributes = $ARGV[5];



# check the input format of the file. only a file with exactly three datasets are excepted. the other one will fill up with zeros


# read-in MEGAN-file
# if #{data-sets} = 1 -> no heade line
# if #{data-sets} > 1 -> heade line " #Datasets		set1	set2	..."
open(inFile , "<$megan_file")  || die "File not found - \"Path-File\"!\n";
my @pairIds = ();
my $header  = "";
my @input_file   = ();
my @numberOfSets = ();

while(<inFile>){
	chomp($_);
  	if($_ =~ /^#/){
  		$header = $_;
  		@numberOfSets = split("\t", $_);
  	}else{
  		#print @numberOfSets . "\n";
  		# check the number of datasets are included
  		if(@numberOfSets == 0 || @numberOfSets == 1 || @numberOfSets > 4){			# no set is in the file
  			print "Error: File doesn't contain any dataset or contain more than three!";
  			exit;
  		}elsif(@numberOfSets == 2){		# only one set is in the file -> add 2x zeros
  			$_ .= "\t" . 0 . "\t" . 0;
  		}elsif(@numberOfSets == 3){		# only two sets are in the file -> add 1x zeros
  			$_ .= "\t" . 0;
  		}
  		
  		addToNetwork($_);
  		push(@input_file, $_);
  	}
}
close(inFile);


# --------------------------------------------------------------------------------------------------------------
# --------------------------------------------------------------------------------------------------------------
# (1) PREPROCESSING: detect all leaf nodes
my $modifiedInput  = detectNonLeafs();

# (2) MAIN COMPUTATION: compute deep by deep (path deep ex. root;Viruses; => deep 2)
my ($vennClusterOut, $specialNumberOut) = clusterVennBottomUp();

# (3) VENN-END-PREPERATION: sum up all single values (d1-d3), transform abs values into 
my $vennToStore    = vennForCytoscape($vennClusterOut, $specialNumberOut);

# (4) SAVE RESULTS INTO FILES: one file contains the network (.sif), the other one contains the attributes
storeNetwork();
store2FileVenn($vennToStore);
# --------------------------------------------------------------------------------------------------------------
# --------------------------------------------------------------------------------------------------------------




sub store2FileVenn{
	my $outVenn = $_[0];
	# test
	my $tmpFileName = $out_attributes;
	
	#my $tmpFileName = "./network.venn";
	open(FILE , ">$tmpFileName")  || die "File can't be written - \"venn - File\"!\n";
		print FILE join("\n", @{$outVenn}) . "\n";
	close(FILE);
}


sub vennForCytoscape{
	my $vennCluster = $_[0];
	my $specialNum  = $_[1];
	my $specNformat = 0;
	my @out = ();
	# datastructure $vennCluster=> vennCluster[]{}{} => values
	#delete $vennCluster->[0]{"no"};
	my $frameSize = 0;
	my $values    = 0;
	my $googleURL = "";
	my $outStr = "";
	
	for(my $i = 0 ; $i < @{$vennCluster}; $i++){
		while ( my($key, $value) = each %{$vennCluster->[$i]} ){
			while ( my($key2, $value2) = each %{$vennCluster->[$i]{$key}} ){
				$values    = $vennCluster->[$i]{$key}{$key2};				
				$frameSize = getCorrectedFrameSize($values);
				
				if(defined $specialNum->[$i]{$key}{$key2}){
					$specNformat = $key2 . "[" . sprintf("%.3f", $specialNum->[$i]{$key}{$key2}) . "]";
				}else{
					$specNformat = $key2; 
				}
				
				# old version, this version works pretty well
				#$frameSize = getFrameSize($values);	
				$googleURL = computeGoogleApiStrRotation($frameSize,$values,$colorMode);
				$outStr = $key2 . "\t" . $googleURL . "\t" . $specNformat . "\t" . $values;
				push(@out, $outStr);
			}
		}
	}
	return \@out;
}


# this function keep the biggest node in the lowest depth,...
sub computeGoogleApiStrRotation{
	my $frameSize = $_[0];
	my $values    = $_[1];
	my $colMode   = $_[2];
	my @relVal    = ();
	my @col   	  = ();
	my %sort      = ();
	my @store  	  = ();
	my %ovHash    = ();
	my @storeOldPos = ();
	my @spVal  	  = split(" ", $values);
	my $sum    	  = $spVal[0] + $spVal[1] + $spVal[2];	
	
	# user color-mode 
	if($colMode == 0){
		$col[0] 	  = "18A3F2";     $col[1] = "FA0800";     $col[2] = "FFF905";
	}elsif($colMode == 1){
		$col[0] 	  = "FF2A00";     $col[1] = "9CFF00";     $col[2] = "00CCFF";
	}elsif($colMode == 2){
		$col[0] 	  = "B4FF00";     $col[1] = "FF00C6";     $col[2] = "00AEFF";
	}elsif($colMode == 3){
		$col[0] 	  = "82FF00";     $col[1] = "7E00FF";     $col[2] = "FF003B";
	}elsif($colMode == 4){
		$col[0] 	  = "1A1A1A";     $col[1] = "8A8A8A";     $col[2] = "C7C7C7";
	}
	
	$sort{"0"} 	  = $spVal[0]; $sort{"1"} = $spVal[1]; $sort{"2"} = $spVal[2]; 
	
	my $tmp = 0;
	foreach(@spVal){
		if($sum != 0){
			$tmp = $_ * 100 / $sum;
		}else{
			$tmp = 0;
		}
		push(@relVal,$tmp);
	}
	
	$ovHash{"01"} = $relVal[3]; $ovHash{"10"} = $relVal[3]; 
	$ovHash{"02"} = $relVal[4]; $ovHash{"20"} = $relVal[4]; 
	$ovHash{"21"} = $relVal[5]; $ovHash{"12"} = $relVal[5]; 
	
	my $url = "http://chart.apis.google.com/chart?chs=" . $frameSize . "x" . $frameSize . "&chco=";
	# change color position in the google output string corresponding to the highest value          
	foreach my $k( sort {$sort{$b}<=>$sort{$a}} keys %sort) {
    	$url .= $col[$k] . ",";
    	push(@store, $k);
	}
	chop($url);	    
	          
	$url .= "&cht=v&chd=t:";
	# sort node values in the right order         
	for(my $i = 0 ; $i < @relVal - 4 ; $i++){
		#print $i . "\t" . $store[$i] . "\t" . $relVal[$store[$i]] . "\n";
		$url .= sprintf("%.1f", $relVal[$store[$i]]) . ",";
	}
	# sort intersection values in the right order
	my $tStr0 = $store[0] . $store[1];
	my $tStr1 = $store[0] . $store[2];
	my $tStr2 = $store[1] . $store[2];
	$url    .= sprintf("%.1f", $ovHash{$tStr0}) . "," . sprintf("%.1f", $ovHash{$tStr1}) . "," . sprintf("%.1f", $ovHash{$tStr2}) . ",";
	$url .= "0.0";
	$url .= "&chf=bg,s,e0dede00";
}


# original function without any node rotation. the order of the nodes is always the same
sub computeGoogleApiStr{
	my $frameSize = $_[0];
	my $values    = $_[1];
	my @relVal    = ();
	my @spVal  = split(" ", $values);
	my $sum    = $spVal[0] + $spVal[1] + $spVal[2];	
	
	foreach(@spVal){
		my $tmp = $_ * 100 / $sum;
		push(@relVal,$tmp);
	}
	my $url = "http://chart.apis.google.com/chart?chs=" . $frameSize . "x" . $frameSize . 
	          #color
	          "&chco=FF6342,ADDE63,63C6DE" . 
	          #"&chco=0000FF,0099FF,00FFFF" .
	          
	          "&cht=v&chd=t:";
	
	for(my $i = 0 ; $i < @relVal - 1 ; $i++){
		$url .= sprintf("%.1f", $relVal[$i]) . ",";
	}        
	$url .= sprintf("%.1f", $relVal[-1]);  
	             
	$url .= "&chf=bg,s,e0dede00";
}


sub getCorrectedFrameSize{
	my $values = $_[0];
	my @spVal  = split(" ", $values);
	my $sum    = $spVal[0] + $spVal[1] + $spVal[2];	
	my $frame  = lookupPixelSQRT($sum);
	
	# find maxValue position ([0] - [2])
	my $arrPos = getMaxPos($spVal[0], $spVal[1], $spVal[2]);
	my $addOver= -1;
	my $addNode= -1;
	my $addSum = -1;
	
	if($arrPos == 0){
		$addNode = $spVal[1] + $spVal[2];
		$addOver = $spVal[3] + $spVal[4];
	}elsif($arrPos == 1){
		$addNode = $spVal[0] + $spVal[2];
		$addOver = $spVal[3] + $spVal[5];
	}elsif($arrPos == 2){
		$addNode = $spVal[0] + $spVal[1];
		$addOver = $spVal[4] + $spVal[5];
	}
	# if the 2 of 3 nodes have no overlap to the largest one, than the complete value
	# will be used to compute a frame and add this to the existing frame
	$addSum = $addNode - $addOver;
	
	my $addFrame = lookupPixelSQRT($addSum);
	my $sumFrame = $frame + $addFrame;
	return $sumFrame;
}


sub getMaxPos{
	my $pos = -1;
	if( ($_[0] >= $_[1]) && ($_[0] >= $_[2]) ){
		$pos = 0;
	}elsif( ($_[1] >= $_[0]) && ($_[1] >= $_[2]) ){
		$pos = 1;
	}else{
		$pos = 2;
	}
	return $pos;
}


sub getFrameSize{
	my $values = $_[0];
	my @spVal  = split(" ", $values);
	my $sum    = $spVal[0] + $spVal[1] + $spVal[2];	
	my $frame  = lookupPixel($sum);
	return $frame;
}



sub clusterVennBottomUp{
	# transform $modifiedInput into datastructure
	# container => [deep]{parent}{child}
	my @container        = ();
	my @containerSpecial = ();
	my @nodeValues = ();
	my $maxDeep    = 0; 
	my %helperHash = ();
	my %specialMatrixAll = ();
	
	foreach(@{$modifiedInput}){
		my @tmpArr = split('\t', $_);
		my @path   = split(';' , $tmpArr[0]);
		my $deep   = @path - 1;
		
		if(($deep - 1) >= 0){
			$container[$deep]{$path[-2]}{$path[-1]}  = $tmpArr[1];
			$nodeValues[$deep]{$path[-2]}{$path[-1]} = "f";
		}else{
			$container[$deep]{"no"}{$path[-1]} = $tmpArr[1];
		}
	}
	# start computation from the deepest path to the root node	
	for(my $i = (@container-1) ; $i >= 0  ; $i--){
		while ( my($key, $value) = each %{$container[$i]} ){
			# update all predecessor nodes
			while ( my($keyUp, $valueUp) = each %helperHash ){
				if(exists $container[$i]{$key}{$keyUp}){
					$container[$i]{$key}{$keyUp} = $valueUp;
					# compute special value by decompose venn's and add special value
					$containerSpecial[$i]{$key}{$keyUp} = vennCongruousness(\@{$specialMatrixAll{$keyUp}});
				}
			}
			# group all nodes which has the same predecessor id and sum up the values	
			while ( my($key2, $value2) = each %{$container[$i]{$key}} ){
				if(exists $helperHash{$key}){
					$helperHash{$key} = addValues($helperHash{$key}, $value2);
					#push(@{$specialMatrixAll{$key}}, $value2);
					#print $key . "\t" . $value2 . "\n";
				}else{
					$helperHash{$key} = $value2;
				}
				#print $key . "\t" . $value2 . "\n";
				push(@{$specialMatrixAll{$key}}, $value2);
			}			
		}
	}
	return \@container, \@containerSpecial;
}


sub vennCongruousness{
	my $inSpecMatrix = $_[0];
	my $numOfSets    = @numberOfSets - 1;
	my @arrVal  = ();  my @matrix = ();  
	my @sum     = ();
	my $numVenn = 0; 
	my %actSet = ();
	my %actOvp = ();
	$actSet{"result"} = 0;
	$actOvp{"result"} = 0;
	
	# (step 1) - sum up rows
	foreach (@{$inSpecMatrix}){
		@arrVal = split(" ", $_);
		$sum[0] += $arrVal[0];    $sum[1] += $arrVal[1];     $sum[2] += $arrVal[2];
		$sum[3] += $arrVal[3];    $sum[4] += $arrVal[4];     $sum[5] += $arrVal[5];
		
		if($arrVal[0] > 0){
			if(!(exists $actSet{1})){
				$actSet{1} = 1;
				$actSet{"result"} += 1; 
			}
		}
		if($arrVal[1] > 0){
			if(!(exists $actSet{2})){
				$actSet{2} = 1;
				$actSet{"result"} += 1; 
			}
		}
		if($arrVal[2] > 0){
			if(!(exists $actSet{3})){
				$actSet{3} = 1;
				$actSet{"result"} += 1; 
			}
		}
		if($arrVal[3] > 0){
			if(!(exists $actOvp{1})){
				$actOvp{1} = 1;
				$actOvp{"result"} += 1; 
			}
		}
		if($arrVal[4] > 0){
			if(!(exists $actOvp{2})){
				$actOvp{2} = 1;
				$actOvp{"result"} += 1; 
			}
		}
		if($arrVal[5] > 0){
			if(!(exists $actOvp{3})){
				$actOvp{3} = 1;
				$actOvp{"result"} += 1; 
			}
		}
	} 
	
	# (step 2) - calc ratios (-1)
	my $i = 0;
	foreach (@{$inSpecMatrix}){
		@arrVal = split(" ", $_);
		for(my $j = 0 ; $j < @arrVal ; $j++){ # eventuell -1 da index von 0 - 6 anstatt 0 - 5 laeuft
			# div zero !
			if($arrVal[$j] == 0){
				$matrix[$i][$j] = 0;
			}else{
				#print $j . "\t" . $sum[$j] . " \t" . $arrVal[$j] . "\n";
				$matrix[$i][$j] = $sum[$j] / $arrVal[$j];
			}
		}
		$i++;
	}
	
	$numVenn = $i;
	# (step 3) - sum up data set ratios d1-d3 
	@sum = (); 
	for(my $j = 0 ; $j < @matrix; $j++){
		$sum[0] += $matrix[$j][0];    $sum[1] += $matrix[$j][1];     $sum[2] += $matrix[$j][2];
		$sum[3] += $matrix[$j][3];    $sum[4] += $matrix[$j][4];     $sum[5] += $matrix[$j][5];
	}
	# (step 4) - calc ratios -> max(d_i, #{V}) / min(d_i, #{V})
	my @condensedM = (); my $max = 0; my $min = 0;
	for(my $j = 0 ; $j < @sum ; $j++){
		$max = ($numVenn, $sum[$j])[$numVenn < $sum[$j]];
		$min = ($numVenn, $sum[$j])[$numVenn > $sum[$j]];
		
		if($min == 0){
			$sum[$j] = 0;
		}else{
			#$sum[$j] = $max / $min;
			$sum[$j] = $sum[$j] / $numVenn;
		}
		#print "-> " . $j . "\t" . $sum[$j] . "\t" . $max . "\t" . $min . "\n";
	}
	# (step 5) - normalize values between zero and one -> [0..1]
	for(my $j = 0 ; $j < @sum ; $j++){
		$max = ($numVenn, $sum[$j])[$numVenn < $sum[$j]];
		$min = ($numVenn, $sum[$j])[$numVenn > $sum[$j]];
		
		if($max == 0){
			$sum[$j] = 0;
		}else{
			$sum[$j] =  $min / $max;
		}
		#print "=> " . $j . "\t" . $sum[$j] . "\t" . $min . "\t" . $max . "\n";
	}
	# (step 6) - combine all decomposed values and create only one value
	# case a: only one dataset -> $numOfSets == 1
	if($numOfSets == 1){
		#print "res: " . $sum[0] . "\n";
		return $sum[0];
	}elsif($numOfSets == 2){
		# evtl fallunteruntescheiung
		print "sum1: " . $sum[0] . "\t" . "sum2: " . $sum[1] . "\t" . "ovp1-2: " . $sum[3] . "\t" . "sets: " . $actSet{"result"} . "\t" . "ovp: " . $actOvp{"result"} . "\n";
		if($actOvp{"result"} == 0){
			my $t = ((($sum[0] + $sum[1]) / $actSet{"result"}) );
			#print "res2 " . $t . " ***\n";
			return ((($sum[0] + $sum[1]) / $actSet{"result"}) );
		}else{
			print "foobar\n";
			my $t = ((((($sum[0] + $sum[1]) / $actSet{"result"}) + $sum[3]) / 2) );
			#print "res2* " . $t . " ***\n";
			return ((((($sum[0] + $sum[1]) / $actSet{"result"}) + $sum[3]) / 2) );
		}
	}elsif($numOfSets == 3){
		#print $sum[0] . "\t" . $sum[1] . "\t" . $sum[2] . "\t" . $sum[3] . "\t" . $sum[4] . "\t" . $sum[5] . "\n";
		#print $actSet{"result"} . "\t" . $actOvp{"result"} . "\n";
		#return ((((($sum[0] + $sum[1] + $sum[2]) / $numOfSets) + (($sum[3] + $sum[4] + $sum[5]) / $numOfSets) ) / 2) );
		
		if($actOvp{"result"} == 0){
			my $t = (($sum[0] + $sum[1] + $sum[2]) / $actSet{"result"});
			#print ">>>>>>> " . $t. "\n";
			return (($sum[0] + $sum[1] + $sum[2]) / $actSet{"result"});
		}else{
			my $t = ((((($sum[0] + $sum[1] + $sum[2]) / $actSet{"result"}) + (($sum[3] + $sum[4] + $sum[5]) / $actOvp{"result"}) ) / 2));
			#print ">>>>>>> " . $t. "\n";
			return ((((($sum[0] + $sum[1] + $sum[2]) / $actSet{"result"}) + (($sum[3] + $sum[4] + $sum[5]) / $actOvp{"result"}) ) / 2));
		}
	}else{
		return -1;
	}
}


# save version of function clusterVennBottomUp()
#sub clusterVennBottomUp{
#	# transform $modifiedInput into datastructure
#	# container => [deep]{parent}{child}
#	my @container  = ();
#	my $maxDeep    = 0; 
#	my %helperHash = ();
#	
#	foreach(@{$modifiedInput}){
#		my @tmpArr = split('\t', $_);
#		my @path   = split(';' , $tmpArr[0]);
#		my $deep   = @path - 1;
#		
#		if(($deep - 1) >= 0){
#			$container[$deep]{$path[-2]}{$path[-1]} = $tmpArr[1];
#		}else{
#			$container[$deep]{"no"}{$path[-1]} = $tmpArr[1];
#		}
#	}
#	# start computation from the deepest path to the root node	
#	for(my $i = (@container-1) ; $i >= 0  ; $i--){
#		while ( my($key, $value) = each %{$container[$i]} ){
#			# update all predecessor nodes
#			while ( my($keyUp, $valueUp) = each %helperHash ){
#				if(exists $container[$i]{$key}{$keyUp}){
#					$container[$i]{$key}{$keyUp} = $valueUp;
#				}
#			}
#			# group all nodes which has the same predecessor id and sum up the values	
#			while ( my($key2, $value2) = each %{$container[$i]{$key}} ){		
#				if(exists $helperHash{$key}){
#					$helperHash{$key} = addValues($helperHash{$key}, $value2);
#				}else{
#					$helperHash{$key} = $value2;
#				}
#			}
#		}
#	}
#	return \@container;
#}



sub addValues{
	my $val1 = $_[0];
	my $val2 = $_[1];
	
	my @sV1 = split(" ", $val1);
	my @sV2 = split(" ", $val2);

	my $tmp = $sV1[0] + $sV2[0];
	my $out = $tmp;
	
	for(my $i = 1 ; $i < @sV1 ; $i++){
		$tmp = $sV1[$i] + $sV2[$i];
		$out .= " " . $tmp;
	}
	return $out;
}



# detect non leaf nodes and remove the values
# works on @input_file !!!
# this version works only with 3 depths!
sub detectNonLeafs{
	my %recursiveValues = ();
	my @modifiedFile    = ();
	my $convertedPath   = "";
	
	my @additionalNetwork = ();
	
	# read last line
	my @tmpArr1 = split('\t',$input_file[($#input_file)],2);
	# -2 path direction from reward instead from the beginning. (-1 leaf,child , -2 parent,inner node)
	my $parent1 = getId($tmpArr1[0],-2);
	my $child1  = getId($tmpArr1[0],-1); 
	my $deep1   = getPathDeep($tmpArr1[0]);
	my $parent2 = "";
	my $child2  = "";  
	my $deep2   = 0;
	
	
	# if "if-statement is true, only root node exists"
	my $outStr = "";
	if($parent1 == -1){
		$outStr = convertPath($tmpArr1[0]) . "\t" . computeLeafValues($tmpArr1[1]);
		push(@modifiedFile, $outStr);
	}else{
		$outStr = convertPath($tmpArr1[0]) . "\t" . computeLeafValues($tmpArr1[1]);
		push(@modifiedFile, $outStr);
	
		for(my $i = (@input_file-2) ; $i >= 0 ; $i--){
			@tmpArr1 = split('\t',$input_file[$i],2);
			$parent2 = getId($tmpArr1[0],-2);
			$child2  = getId($tmpArr1[0],-1); 
			$deep2   = getPathDeep($tmpArr1[0]);
			
			#print $parent2 . "\t" . $child2 . "\n";
			
			# if eq true -> new leaf
			if($parent2 eq $parent1){
				$outStr = convertPath($tmpArr1[0]) . "\t" . computeLeafValues($tmpArr1[1]);
				push(@modifiedFile, $outStr);
			}elsif($parent1 eq $child2){
				$outStr = convertPath($tmpArr1[0]) . "\t" . "undef";
				push(@modifiedFile, $outStr);
				
				my @check = split('\t', $tmpArr1[1]);
				my $tSum  = 0;
				foreach(@check){
					$tSum += $_;
				}
				if(($onlyLeafs eq "off") && ($tSum > 0)){
					$outStr = convertPath($tmpArr1[0]) . "not_assigned_" . $child2 . ";" . "\t" . computeLeafValues($tmpArr1[1]);
					push(@modifiedFile, $outStr);
					$outStr = $child2 . " pp " . "not_assigned_" . $child2;
					push(@network, $outStr);
				}
			}else{
				$outStr = convertPath($tmpArr1[0]) . "\t" . computeLeafValues($tmpArr1[1]);
				push(@modifiedFile, $outStr);
			}			
			
			if($parent1 == -1){
				push(@modifiedFile, convertPath($tmpArr1[0]));
				last;	
			}
			$parent1 = $parent2;
			$child1  = $child2;
			$deep1   = $deep2;
		}
	}
	# store @additionalNetwork in .sif file!!! at this point, the sif file exists!
	# it is stored into @network container. this container is globel defined!
	
	return \@modifiedFile;
}


# helper function for detectNonLeafs 
sub getPathDeep{
	my $inPath = $_[0];
	my @deep = split(';', $inPath);
	my $size = $#deep;
	return $size;
}

sub convertPath{
	my $inString = $_[0];
	$inString =~ s/"//g;
	$inString =~ s/\s+/_/g;
	return $inString;
}

sub getId{
	my $lineToParse = $_[0];
	my $idPos       = $_[1];
	my $stringId = "";
	my @path     = ();
	
	$lineToParse =~ s/"//g;
	$lineToParse =~ s/\s+/_/g;
	@path = split(';',$lineToParse);
	my $num = @path;
	
	if(($num + $idPos) < 0){
		return -1;
	}else{
		return $path[$idPos];	
	}
}

sub computeLeafValues{
	my $meganValues = $_[0];
	my @rawValues   = split('\t', $meganValues);
	my @nodeRelVal  = ();
	
	my $outValues   = $rawValues[0] . " " . $rawValues[1] . " " . $rawValues[2]; 

	if($rawValues[0] <= $rawValues[1]){
		$outValues .= " " . $rawValues[0];
	}else{
		$outValues .= " " . $rawValues[1];
	}
	if($rawValues[0] <= $rawValues[2]){
		$outValues .= " " . $rawValues[0];
	}else{
		$outValues .= " " . $rawValues[2];
	}
	if($rawValues[1] <= $rawValues[2]){
		$outValues .= " " . $rawValues[1];
	}else{
		$outValues .= " " . $rawValues[2];
	}
	#my ($min, $max) = minmax @rawValues;
	my $min = 0;
	$outValues .= " " . $min;
	
	return $outValues;
}
# -----------------------------------------------------------------------------


# compute network (.sif)
sub addToNetwork{
	my $inLine = $_[0];
	my @splitInLine = split('\t',$inLine);
	# remove ' " ' from line
	$splitInLine[0] =~ s/"//g;
	$splitInLine[0] =~ s/\s+/_/g;
	my @elements    = split(';' ,$splitInLine[0]); 
		
	if(@elements > 1){
		my $outString = $elements[-2] . " pp " . $elements[-1];
		push(@network, $outString);
	}
}


# store network in .sif file
sub storeNetwork{
	# test
	my $tmpFileName = $out_network;
	
	#my $tmpFileName = "./network.sif";
	open(FILE , ">$tmpFileName")  || die "File can't be written - \"sif - File\"!\n";
		print FILE join("\n", @network) . "\n";
	close(FILE);
}


# ---------------------------------------------------------------------------------------------
# two different lookup-tables are available!
# lookupPixel() => static ; lookupPixelSQRT() => dynamic
#
# lookup absolute node-size to pixel (frame-size for venn-diagram)
sub lookupPixel{
	my $query = $_[0];
	
	if($query < 10){
		return 30;
	}elsif($query < 100){
		return 40;
	}elsif($query < 1000){
		return 50;
	}elsif($query < 10000){
		return 60;
	}elsif($query < 100000){
		return 80;
	}elsif($query < 1000000){
		return 100;
	}elsif($query < 10000000){
		return 140;
	}elsif($query < 20000000){
		return 180;
	}elsif($query < 30000000){
		return 220;
	}else{
		return 250;	
	}
}

# lookup absolute node-size to pixel (frame-size for venn-diagram) <- this is currently used!
sub lookupPixelSQRT{
	
	if ($transFnc == 0) {
		return int(($_[0] ** (1/(1.6))) * 1.8 + 8);  # 3,000 datapoints in sum
	}elsif($transFnc == 1){
		return int(($_[0] ** (1/(2.1))) * 1.8 + 8);  # 30,000 datapoints in sum
	}elsif($transFnc == 2){
		return int(($_[0] ** (1/(2.6))) * 1.8 + 8);  # 300,000 datapoints in sum
	}elsif($transFnc == 3){
		return int(($_[0] ** (1/(3.1))) * 1.8 + 8);  # 3,000,000 datapoints in sum
	}elsif($transFnc == 4){
		return int(($_[0] ** (1/(3.7))) * 1.8 + 8);  # 30,000,000 datapoints in sum
	}elsif($transFnc == 5){
		return int(($_[0] ** (1/(4))) * 1.8 + 8);    # 300,000,000 datapoints in sum
	}elsif($transFnc == 6){
		return int(($_[0] ** (1/(4.7))) * 1.8 + 8);  # 3,000,000,000 datapoints in sum
	}
	
	#return int(($_[0] ** (1/(3.3))) * 1.8 + 30);	# test version for small and large datasets?
	#return int(($_[0] ** (1/(3.3))) * 1.8 + 5);	# test version for small and large datasets?
	#return int(($_[0] ** (1/(4))) * 1.8 + 8);	    # test version for small and large datasets?
	#return int(($_[0] ** (1/6)) * 12);				# old version this version is good for large datasets
}