Mercurial > repos > melpetera > acorf

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ACF/Analytic_correlation_filtration.pl	Fri Oct 18 04:59:51 2019 -0400
@@ -0,0 +1,643 @@
+#!usr/bin/perl
+
+### Perl modules
+use warnings;
+use strict;
+use Getopt::Long qw(GetOptions); #Creation of script options
+use Pod::Usage qw(pod2usage); #Creation of script options
+
+#Personnal packages
+use FindBin ; ## Allows you to locate the directory of original perl script
+#use lib $FindBin::Bin;
+use lib "$FindBin::Bin/lib";
+use IonFiltration;
+
+my ($file, $mass_file, $opt, $dataMatrix, $combined_DMVM, $repres_opt, $rt_threshold, $mass_threshold, $output_sif, $output_tabular, $correl_threshold, $intensity_threshold, $intensity_pourc); #Options to complete
+
+########################
+### Options and help ###
+########################
+
+GetOptions("f=s"=>\$file, "m=s"=>\$mass_file, "o=s"=>\$opt, "d=s"=>\$dataMatrix, "v=s"=>\$combined_DMVM, "r=s"=>\$repres_opt, "rt=f"=>\$rt_threshold, "mass=f"=>\$mass_threshold, "output_sif=s"=>\$output_sif, "output_tabular=s"=>\$output_tabular, "correl=s"=>\$correl_threshold, "IT=f"=>\$intensity_threshold, "IP=f"=>\$intensity_pourc) or pod2usage(2);
+
+### Check required parameters :
+pod2usage({-message=>q{Mandatory argument '-f' is missing}, -exitval=>1, -verbose=>0}) unless $file;
+#pod2usage({-message=>q{Mandatory argument '-m' is missing}, -exitval=>1, -verbose=>0}) unless $mass_file;
+pod2usage({-message=>q{Mandatory argument '-o' is missing. It correspond to the grouping method for analytical correlation groups formation.
+#It should be a number (1 ; 2 or 3) :
+#	1 : Don't take into acount mass information (only RT) ;
+#	2 : Check that all mass differences are include in a specific list and taking into acount RT information
+#	3 : Check that all mass differences are include in a specific list, ignoring RT information
+#To use the tool without takinf into account mass and RT information, use option 1 and define the RT threshold to 999999999.}, -exitval=>1, -verbose=>0}) unless $opt;
+pod2usage({-message=>q{Mandatory argument '-r' is missing. It correspond to the group representent choosing method for analytical correlation groups formation.
+It should be one of the 3 options below :
+	"mass" : choose the ion with the highest mass as the representant
+	"intensity" : choose the ion with the highest intensity as the representant
+	"mixt" : choose the ion with the highest (mass^2 * intensity) as the representant
+	"max_intensity_max_mass" : choose tha ion witht he highest intenisty among the 5 most intense ions of the group}, -exitval=>1, -verbose=>0}) unless $repres_opt;
+pod2usage({-message=>q{Mandatory argument '-d' is missing}, -exitval=>1, -verbose=>0}) unless $dataMatrix;
+pod2usage({-message=>q{Mandatory argument '-v' is missing}, -exitval=>1, -verbose=>0}) unless $combined_DMVM;
+#pod2usage({-message=>q{Mandatory argument '-rt' is missing}, -exitval=>1, -verbose=>0}) unless $rt_threshold;
+#pod2usage({-message=>q{Mandatory argument '-mass' is missing}, -exitval=>1, -verbose=>0}) unless $mass_threshold;
+pod2usage({-message=>q{Mandatory argument '-correl' is missing}, -exitval=>1, -verbose=>0}) unless $correl_threshold;
+pod2usage({-message=>q{Mandatory argument '-output_tabular' is missing}, -exitval=>1, -verbose=>0}) unless $output_tabular;
+pod2usage({-message=>q{Mandatory argument '-output_sif' is missing}, -exitval=>1, -verbose=>0}) unless $output_sif;
+
+
+#if(($opt != 1) && ($opt != 2) && ($opt != 3)){
+#	print "you must indicate \"1\", \"2\" or \"3\" for the --o otpion\n";
+#	exit;
+#}
+
+
+
+if(($repres_opt ne "mass") && ($repres_opt ne "intensity") && ($repres_opt ne "mixt") && ($repres_opt ne "max_intensity_max_mass")){
+	print "you must indicate \"mass\", \"intensity\", \"mix\" or \"max_intensity_max_mass\" for the --r otpion\n";
+	exit;
+}
+
+
+
+#########################################################################
+#### Création of a hash containing all adduits and fragments possible ###
+#########################################################################
+
+my %hmass;
+if($opt != 1){
+	%hmass = IonFiltration::MassCollecting($mass_file);
+
+}
+
+my $refhmass = \%hmass;
+
+print "Création of a hash containing all adduits and fragments possible\n";
+
+
+########################################################
+### Creation of a sif table + correlation filtration ###
+########################################################
+
+my %hrtmz;
+($output_sif, %hrtmz) = IonFiltration::sifTableCreation($file, $output_sif, $opt, $rt_threshold, $mass_threshold, $correl_threshold, $dataMatrix, $output_tabular, $combined_DMVM, $repres_opt, $intensity_threshold, $intensity_pourc, \%hmass);
+print "Creation of a sif table + correlation filtration done\n";
+
+
+######################################################
+### Analytic correlation filtrering follow options ###
+######################################################
+
+my %hheader_file;
+my %hduplicate;
+
+my %hcorrelgroup;
+my $groupct=1;
+
+my $linenb3=0;
+my %hheader_line;
+
+
+
+open (F1, $output_sif) or die "Impossible to open $output_sif\n";
+
+while(my $line = <F1>){
+	my $count=0;
+	chomp $line;
+	my @tline = split(/\t/, $line);
+	my $a = $tline[0];
+	my $b = $tline[2];
+
+	my $amass=$hrtmz{$a}{mz};
+	my $atemp=$hrtmz{$a}{rt};
+	my $bmass= $hrtmz{$b}{mz};
+	my $btemp=$hrtmz{$b}{rt};
+	my $diff = $amass-$bmass;
+	$diff = abs($diff);
+
+	### Option 1: Don't take into acount mass information ###
+
+	if($opt == 1){
+		my $btplus = $btemp + $rt_threshold;
+		my $btmoins = $btemp - $rt_threshold;
+		if(($btmoins <= $atemp) && ($atemp <= $btplus)){
+			foreach my $k (keys %hcorrelgroup){
+				if((defined($hcorrelgroup{$k}{$a})) || (defined($hcorrelgroup{$k}{$b}))){
+					$hcorrelgroup{$k}{$a}=1;
+					$hcorrelgroup{$k}{$b}=1;
+					$count++;
+					last;
+				}
+			}
+			if($count == 0){
+				my $groupnb="group".$groupct;
+				$hcorrelgroup{$groupnb}{$a}=1;
+				$hcorrelgroup{$groupnb}{$b}=1;
+				$groupct ++;
+			}
+		}
+	}
+
+
+
+	### Option 2: Check that all mass differences are include in a specific list taking into account RT information ###
+
+	elsif($opt == 2){
+
+		my $print = 0;
+		foreach my $s (keys %{$refhmass}){
+			foreach my $r (keys %{$refhmass->{$s}}){
+				my $rm = $r - $mass_threshold;
+				my $rp = $r + $mass_threshold;
+				if(($diff <= $rp) && ($diff >= $rm)){
+					if($print == 0){
+						my $btplus = $btemp + $rt_threshold;
+						my $btmoins = $btemp - $rt_threshold;
+
+						if(($btmoins <= $atemp) && ($atemp <= $btplus)){
+							foreach my $k (keys %hcorrelgroup){
+								if((defined($hcorrelgroup{$k}{$a})) || (defined($hcorrelgroup{$k}{$b}))){
+									$hcorrelgroup{$k}{$a}=1;
+									$hcorrelgroup{$k}{$b}=1;
+									$count++;
+									last;
+								}
+							}
+							if($count == 0){
+								my $groupnb="group".$groupct;
+								$hcorrelgroup{$groupnb}{$a}=1;
+								$hcorrelgroup{$groupnb}{$b}=1;
+								$groupct ++;
+							}
+							$print = 1;
+						}
+					}
+				}
+			}
+		}
+	}
+
+
+	### Option 3: Check that all mass differences are include in a specific list, ignoring RT information ###
+
+	elsif($opt == 3){
+
+		my $print = 0;
+		foreach my $s (keys %{$refhmass}){
+			foreach my $r (keys %{$refhmass->{$s}}){
+				my $rm = $r - $mass_threshold;
+				my $rp = $r + $mass_threshold;
+				if(($diff <= $rp) && ($diff >= $rm)){
+					if($print == 0){
+
+						foreach my $k (keys %hcorrelgroup){
+							if((defined($hcorrelgroup{$k}{$a})) || (defined($hcorrelgroup{$k}{$b}))){
+								$hcorrelgroup{$k}{$a}=1;
+								$hcorrelgroup{$k}{$b}=1;
+								$count++;
+								last;
+							}
+						}
+						if($count == 0){
+							my $groupnb="group".$groupct;
+							$hcorrelgroup{$groupnb}{$a}=1;
+							$hcorrelgroup{$groupnb}{$b}=1;
+							$groupct ++;
+						}
+						$print = 1;
+					}
+				}
+			}
+		}
+	}
+}
+close F1;
+
+print "Analytic correlation filtrering follow options done\n";
+
+
+#############################################
+### Join groups that have been subdivided ###
+#############################################
+
+my @tdelete;
+
+foreach my $k (keys %hcorrelgroup){
+	foreach my $i (keys %{$hcorrelgroup{$k}}){
+		foreach my $v (keys %hcorrelgroup){
+			my $count = 0;
+			if ($v ne $k){
+				foreach my $w (keys %{$hcorrelgroup{$v}}){
+					if($w eq $i){
+						$count = 1;
+						push(@tdelete, $v);
+					}
+				}
+			}
+			if($count == 1){
+				foreach my $w (keys %{$hcorrelgroup{$v}}){
+					$hcorrelgroup{$k}{$w}=$hcorrelgroup{$v}{$w};
+				}
+				delete($hcorrelgroup{$v});
+			}
+		}
+	}
+}
+
+foreach my $t (@tdelete){
+	delete($hcorrelgroup{$t});
+}
+
+
+### Do it twice to see if it fix the problem of unmerge groups
+
+foreach my $k (keys %hcorrelgroup){
+	foreach my $i (keys %{$hcorrelgroup{$k}}){
+		foreach my $v (keys %hcorrelgroup){
+			my $count = 0;
+			if ($v ne $k){
+				foreach my $w (keys %{$hcorrelgroup{$v}}){
+					if($w eq $i){
+						$count = 1;
+						push(@tdelete, $v);
+					}
+				}
+			}
+			if($count == 1){
+				foreach my $w (keys %{$hcorrelgroup{$v}}){
+					$hcorrelgroup{$k}{$w}=$hcorrelgroup{$v}{$w};
+				}
+				delete($hcorrelgroup{$v});
+			}
+		}
+	}
+}
+
+foreach my $t (@tdelete){
+	delete($hcorrelgroup{$t});
+}
+
+print "Join groups that have been subdivided done\n";
+
+#######################################################
+### Addition of annotation information among groups ###
+#######################################################
+
+foreach my $k (keys %hcorrelgroup){
+	foreach my $i (keys %{$hcorrelgroup{$k}}){
+		foreach my $j (keys %{$hcorrelgroup{$k}}){
+			my $count = 0;
+			if ($i ne $j){
+
+				my $a = $hrtmz{$i}{mz};
+				my $b = $hrtmz{$j}{mz};
+
+				my $diff = $a - $b;
+				my $sign;
+				if($diff>0){
+					$sign="+";
+				}
+				if($diff<0){
+					$sign="-";
+				}
+				$diff = abs($diff);
+
+				foreach my $z (keys %{$refhmass}){
+
+					foreach my $y (keys %{$refhmass->{$z}}){
+						my $ym = $y - $mass_threshold;
+						my $yp = $y + $mass_threshold;
+
+
+						if(($diff <= $yp) && ($diff >= $ym)){
+							my $diff_list = $diff - $y;
+							$diff_list = abs($diff_list);
+							$diff_list = sprintf ("%0.6f", $diff_list);
+
+							if($hcorrelgroup{$k}{$i} eq 1){
+								my $val = "@".$j."|".$sign."(".$z.")(".$diff_list.")|";
+								$hcorrelgroup{$k}{$i}=$val;
+								$count ++;
+							}
+							else{
+								if($count == 0){
+									my $val = "@".$j."|".$sign."(".$z.")(".$diff_list.")|";
+									$hcorrelgroup{$k}{$i}.=$val;
+									$count ++;
+								}
+								else{
+									my $val = $sign."(".$z.")(".$diff_list.")|";
+									$hcorrelgroup{$k}{$i}.=$val;
+									$count ++;
+								}
+							}
+						}
+					}
+				}
+			}
+		}
+	}
+}
+
+
+print "Addition of annotation information among groups done\n";
+
+
+####################################################
+### Choose the representative ion for each group ###
+####################################################
+
+my %hgrouprepres;
+
+open(F3, $dataMatrix);
+
+while (my $line = <F3>){
+	chomp $line;
+
+	my @tline = split (/\t/, $line);
+
+	foreach my $k (keys %hcorrelgroup){
+		foreach my $i (keys %{$hcorrelgroup{$k}}){
+			if($tline[0] eq $i){
+				$hgrouprepres{$k}{$i}{mass}=$hrtmz{$tline[0]}{mz};
+				my $intensity;
+				my $nbsubjects=0;
+				for(my $y=1;$y<scalar(@tline);$y++){
+					$intensity += $tline[$y];
+					$nbsubjects ++;
+				}
+				my $meanintensity = $intensity/$nbsubjects;
+				$hgrouprepres{$k}{$i}{intensity}=$meanintensity;
+				$hgrouprepres{$k}{$i}{squaredmassint}=($hgrouprepres{$k}{$i}{mass}**2)/($hgrouprepres{$k}{$i}{intensity});
+			}
+		}
+	}
+}
+close F3;
+
+foreach my $z (keys %hgrouprepres){
+	my $max_intensity =  0;
+	my $max_int_ion = "";
+	my $max_mass = 0;
+	my $max_mass_ion = "";
+	my $max_squared = 0;
+	my $max_squared_ion = "";
+	foreach my $w (keys %{$hgrouprepres{$z}}){
+		if($hgrouprepres{$z}{$w}{intensity} > $max_intensity){
+			$max_intensity = $hgrouprepres{$z}{$w}{intensity};
+			$max_int_ion = $w;
+		}
+		if($hgrouprepres{$z}{$w}{mass} > $max_mass){
+			$max_mass = $hgrouprepres{$z}{$w}{mass};
+			$max_mass_ion = $w;
+		}
+		if($hgrouprepres{$z}{$w}{squaredmassint} > $max_squared){
+			$max_squared = $hgrouprepres{$z}{$w}{squaredmassint};
+			$max_squared_ion = $w;
+		}
+	}
+
+	my $max_int_max_mass_ion="";
+
+	if($repres_opt eq "max_intensity_max_mass"){
+		my %hfirst;
+		my $first=0;
+		foreach my $w (reverse sort {$hgrouprepres{$z}{$a}{intensity} <=> $hgrouprepres{$z}{$b}{intensity} } keys %{$hgrouprepres{$z}}){
+			$first ++;
+			if ($first <= 3){
+				$hfirst{$w} = $hgrouprepres{$z}{$w}{intensity};
+			}
+		}
+
+		my $first_2 = 0;
+		my $intens_max = 0;
+		my $mass_max = 0;
+
+		foreach my $y (reverse sort {$hfirst{$a} <=> $hfirst{$b}} keys %hfirst){
+
+			$first_2 ++;
+			if($first_2 == 1){
+				$intens_max = $hfirst{$y};
+				if($intensity_threshold > $intens_max){
+					$intensity_threshold = 0;
+				}
+				$max_int_max_mass_ion = $y;
+				$mass_max = $hgrouprepres{$z}{$y}{mass};
+			}
+			if($hgrouprepres{$z}{$y}{mass} > $mass_max){
+				if($hfirst{$y}>$intensity_threshold){
+					my $a = $intens_max * $intensity_pourc;
+					if($hfirst{$y} > $a){
+						$max_int_max_mass_ion = $y;
+						$mass_max = $hgrouprepres{$z}{$y}{mass};
+					}
+				}
+			}
+		}
+	}
+
+	$hgrouprepres{$z}{max_int}=$max_int_ion;
+	$hgrouprepres{$z}{max_mass}=$max_mass_ion;
+	$hgrouprepres{$z}{max_squared}=$max_squared_ion;
+	$hgrouprepres{$z}{max_int_max_mass}=$max_int_max_mass_ion;
+
+}
+
+
+print "Choose the representative ion for each group done\n";
+
+#############################################################################
+### Addition of annotation information relative to the representative ion ###
+#############################################################################
+
+my %hreprescomparison;
+
+my $representative="";
+
+if($opt != 1){
+	foreach my $k (keys %hcorrelgroup){
+		foreach my $i (keys %{$hcorrelgroup{$k}}){
+
+			if($repres_opt eq "mass"){$representative = $hgrouprepres{$k}{max_mass}}
+			if($repres_opt eq "intensity"){$representative = $hgrouprepres{$k}{max_int}}
+			if($repres_opt eq "mixt"){$representative = $hgrouprepres{$k}{max_squared}}
+			if($repres_opt eq "max_intensity_max_mass"){$representative = $hgrouprepres{$k}{max_int_max_mass}}
+
+
+			my $count = 0;
+			if ($i ne $representative){
+
+				my $a = $hrtmz{$i}{mz};
+				my $b = $hrtmz{$representative}{mz};
+
+				my $diff = $a - $b;
+				my $sign;
+				if($diff>0){
+					$sign="+";
+				}
+				if($diff<0){
+					$sign="-";
+				}
+				$diff = abs($diff);
+
+				foreach my $z (keys %{$refhmass}){
+
+					foreach my $y (keys %{$refhmass->{$z}}){
+						my $ym = $y - $mass_threshold;
+						my $yp = $y + $mass_threshold;
+
+						if(($diff <= $yp) && ($diff >= $ym)){
+							my $diff_list = $diff - $y;
+							$diff_list = abs($diff_list);
+							$diff_list = sprintf ("%0.4f", $diff_list);
+							if($hcorrelgroup{$k}{$i} eq 1){
+								my $valrep = "[M ".$sign."(".$z.")]|";
+								$hreprescomparison{$k}{$i}{repres_diff}=$valrep;
+								$count ++;
+							}
+							else{
+								if($count == 0){
+									my $valrep = "[M ".$sign."(".$z.")]|";
+									$hreprescomparison{$k}{$i}{repres_diff}.=$valrep;
+									$count ++;
+								}
+								else{
+									my $valrep = "[M ".$sign."(".$z.")]|";
+									$hreprescomparison{$k}{$i}{repres_diff}.=$valrep;
+									$count ++;
+								}
+							}
+						}
+					}
+				}
+			}
+			else{
+				$hreprescomparison{$k}{$i}{repres_diff}="M";
+			}
+		}
+	}
+}
+
+
+print "Addition of annotation information relative to the representative ion done\n";
+
+##############################
+### Print in result file ! ###
+##############################
+
+open(F4, ">$output_tabular");
+open(F5, $combined_DMVM);
+
+my $line_nb = 0;
+my %hheader;
+while (my $line = <F5>){
+	chomp $line;
+
+
+	my @tline = split (/\t/, $line);
+
+	if($line_nb == 0){
+		print F4 "$line\tACorF_groups";
+		if($opt == 1){
+			if($repres_opt eq "intensity"){print F4 "\tACorF_filter\tintensity_repres\n"}
+			if($repres_opt eq "mass"){print F4 "\tACorF_filter\tmass_repres\n"}
+			if($repres_opt eq "mixt"){print F4 "\tACorF_filter\tmass2intens_repres\n"}
+			if($repres_opt eq "max_intensity_max_mass"){print F4 "\tACorF_filter\tmax_intensity_max_mass_repres\n"}
+			}
+		else{
+			if($repres_opt eq "intensity"){print F4 "\tisotopes_adducts_fragments_[\@id|annotation(delta_annotation)]\tACorF_filter\tintensity_repres\tannotation_relative_to_representative\n"}
+			if($repres_opt eq "mass"){print F4 "\tisotopes_adducts_fragments_[\@id|annotation(delta_annotation)]\tACorF_filter\tmass_repres\tannotation_relative_to_representative\n"}
+			if($repres_opt eq "mixt"){print F4 "\tisotopes_adducts_fragments_[\@id|annotation(delta_annotation)]\tACorF_filter\tmass2intens_repres\tannotation_relative_to_representative\n"}
+			if($repres_opt eq "max_intensity_max_mass"){print F4 "\tisotopes_adducts_fragments_[\@id|annotation(delta_annotation)]\tACorF_filter\tmax_intensity_max_mass_repres\tannotation_relative_to_representative\n"}
+		}
+
+
+		### Creation of a header hash
+		for(my $i=0; $i<scalar(@tline);$i++){
+			my $a = $tline[$i];
+			$hheader{$a}=$i;
+		}
+	}
+
+	else{
+		my $find = 0;
+		foreach my $v (keys %hcorrelgroup){
+			if(defined($hgrouprepres{$v}{$tline[0]})){
+				print F4 "$line\t$v";
+
+				if($opt != 1){
+					if(defined($hcorrelgroup{$v}{$tline[0]})){
+						print F4 "\t$hcorrelgroup{$v}{$tline[0]}\t";
+
+					}
+					else{
+						print F4 "\t";
+					}
+				}
+
+				if($repres_opt eq "intensity"){
+					if($tline[0] eq $hgrouprepres{$v}{max_int}){
+						print F4 "1\t";
+					}
+					else{
+						print F4 "0\t";
+					}
+					$find = 1;
+				}
+				if($repres_opt eq "mass"){
+					if($tline[0] eq $hgrouprepres{$v}{max_mass}){
+						print F4 "1\t";
+					}
+					else{
+						print F4 "0\t";
+					}
+					$find = 1;
+				}
+				if($repres_opt eq "mixt"){
+					if($tline[0] eq $hgrouprepres{$v}{max_squared}){
+						print F4 "1\t";
+					}
+					else{
+						print F4 "0\t";
+					}
+					$find = 1;
+				}
+				if($repres_opt eq "max_intensity_max_mass"){
+					if($tline[0] eq $hgrouprepres{$v}{max_int_max_mass}){
+						print F4 "1\t";
+					}
+					else{
+						print F4 "0\t";
+					}
+					$find = 1;
+				}
+
+				if($repres_opt eq "intensity"){print F4 "$hgrouprepres{$v}{max_int}\t"}
+				if($repres_opt eq "mass"){print F4 "$hgrouprepres{$v}{max_mass}\t"}
+				if($repres_opt eq "mixt"){print F4 "$hgrouprepres{$v}{max_squared}\t"}
+				if($repres_opt eq "max_intensity_max_mass"){print F4 "$hgrouprepres{$v}{max_int_max_mass}\t"}
+
+				if(defined($hreprescomparison{$v}{$tline[0]}{repres_diff})){
+					print F4 "$hreprescomparison{$v}{$tline[0]}{repres_diff}\n";
+				}
+				else{
+					print F4 "-\n";
+				}
+			}
+		}
+		if($find == 0){
+			$groupct ++;
+			my $group = "group".$groupct;
+			if($opt != 1){
+				print F4 "$line\t$group\t-\t-\t-\t-\n";
+			}
+			else{
+				print F4 "$line\t$group\t-\t-\n";
+			}
+		}
+	}
+	$line_nb ++;
+}
+
+print "Print in result file done\n";
+
+print "All steps done\n";
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ACF/README.md	Fri Oct 18 04:59:51 2019 -0400
@@ -0,0 +1,45 @@
+Analytical Correlation Filtration
+=======
+
+Metadata
+-----------
+
+ * **@name**: ACorF
+ * **@version**: 2019-06-20
+ * **@authors**: <stephanie.monnerie@inra.fr>
+ * **@date creation**: 2018/11/17
+ * **@main usage**: Reduction of analytical redundancies in Metabolomics data
+
+
+Configuration
+-----------
+
+### Requirement:
+ * perl
+
+
+### Deploy:
+
+
+### Warnings:
+
+
+Services provided
+-----------
+
+
+
+Technical description
+-----------
+
+
+Notes
+-----------
+
+
+
+
+License (optional)
+-----------
+
+This code is published under CECILL 2.1.
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ACF/analytic_correlation_filtration.xml	Fri Oct 18 04:59:51 2019 -0400
@@ -0,0 +1,211 @@
+<tool id="Analytic_correlation_filtration" name="Analytic correlation filtration" version="2019-06-20">
+	<description>
+		: Detect analytic correlation among data and remove them.
+	</description>
+
+
+	 <command><![CDATA[
+
+
+		perl $__tool_directory__/Analytic_correlation_filtration.pl
+
+
+		#if str($mass_file.mass_choice)=="false":
+			#if str($rt_cond.rt_choice)=="false":
+				perl $__tool_directory__/Analytic_correlation_filtration.pl -f "$file_in" -o 1 -d "$dataMatrix_in" -v "$variableMetadata_in" -rt 9999999999
+			#else:
+				perl $__tool_directory__/Analytic_correlation_filtration.pl -f "$file_in" -o 1 -d "$dataMatrix_in" -v "$variableMetadata_in"  -rt "$rt_cond.rt_threshold"
+			#end if
+		#else:
+			#if str($mass_file.liste.mass_list)=="true":
+				#if str($rt_cond.rt_choice)=="true":
+					perl $__tool_directory__/Analytic_correlation_filtration.pl -f "$file_in" -m "$mass_file.liste.mass_file_in" -o 2 -d "$dataMatrix_in" -v "$variableMetadata_in"  -rt "$rt_cond.rt_threshold" -mass "$mass_file.mass_threshold"
+				#end if
+				#if str($rt_cond.rt_choice)=="false":
+					perl $__tool_directory__/Analytic_correlation_filtration.pl -f "$file_in" -m "$mass_file.liste.mass_file_in" -o 3 -d "$dataMatrix_in" -v "$variableMetadata_in"  -mass "$mass_file.mass_threshold"
+				#end if
+			#else
+					#if str($rt_cond.rt_choice)=="true":
+						perl $__tool_directory__/Analytic_correlation_filtration.pl -f "$file_in" -m $__tool_directory__/data/default_list.csv -o 2 -d "$dataMatrix_in" -v "$variableMetadata_in" -rt "$rt_cond.rt_threshold" -mass "$mass_file.mass_threshold"
+					#end if
+					#if str($rt_cond.rt_choice)=="false":
+						perl $__tool_directory__/Analytic_correlation_filtration.pl -f "$file_in" -m $__tool_directory__/data/default_list.csv -o 3 -d "$dataMatrix_in" -v "$variableMetadata_in" -mass "$mass_file.mass_threshold"
+					#end if
+			#end if
+		#end if
+
+		-r "$repres_opt.repres_opt_selector"
+
+		#if str($repres_opt.repres_opt_selector)=="max_intensity_max_mass":
+			-IT $repres_opt.int_threshold
+			-IP $repres_opt.int_percentage
+		#end if
+		-correl "$correl_threshold"
+		-output_sif "$sif_out"
+		-output_tabular "$variableMetadata_out"
+
+	]]></command>
+
+	<inputs>
+		<param type="data" name="file_in" format="txt" help="The .txt similarity table (you can obtain it by using the Between-table Correlation tool or for exemple the cor() function in R) " label="Correlation table file" />
+		<param type="data" name="dataMatrix_in" format="tabular" help="" label="dataMatrix file" />
+		<param type="data" name="variableMetadata_in" format="tabular" help="" label="variableMetadata file" />
+
+		<param help="Define the minimum similarity threshold accepted to determine analytic correlation" label="Correlation threshold" type="float" name="correl_threshold" value="0.90"/>
+
+		<conditional name="mass_file">
+		  <param name="mass_choice" checked="true" falsevalue="false" help="'YES' if you want to take it into account; 'NO' if you don't want to take into account mass information" label="Do you want to take into account mass differences between 2 ions?" truevalue="true" type="boolean"/>
+				<when value="true">
+					<conditional name="liste">
+						<param name="mass_list" checked="true" falsevalue="false" help="'YES' if you have your own list to upload; 'NO' if you want to use a default list" label="Do you have your own list of mass differences or do you want to use a default list ?" truevalue="true" type="boolean"/>
+						<when value="false">
+
+						</when>
+						<when value="true">
+							<param type="data" name="mass_file_in" format="tabular,csv" help="The file containing all your report and known mass differences (cf help for file example) " label="Mass differences table (format: tabular or csv) " />
+						</when>
+					</conditional>
+					<param help="2 ions need to have a difference mass included in the list at +/- mass difference range to be considered as analytically correlated | Value recommendation : 0.005" label="Mass difference range" type="float" name="mass_threshold" value="0.005"/>
+				</when>
+				<when value="false">
+
+				</when>
+		</conditional>
+
+		<conditional name="rt_cond">
+			<param checked="true" falsevalue="false" help="'YES' if want to take into account retention time information; 'NO' if you don't want to take into account retention time information" label="Do you want to take into account retention time differences between 2 ions? " name="rt_choice" truevalue="true" type="boolean"/>
+				<when value="true">
+					<param help="Choose a retention time difference threshold between 2 ions considered as analytically correlated | Value recommendation : 0.1" label="Retention time difference threshold" type="float" name="rt_threshold" value="0.1"/>
+				</when>
+				<when value="false">
+
+				</when>
+		</conditional>
+
+		<conditional name="repres_opt">
+			<param name="repres_opt_selector" label="Which representative ion do you want to select for each group" type="select" display="radio" help="">
+				<option value="intensity">Highest intensity</option>
+				<option value="mass">Highest mass</option>
+				<option value="mixt">Highest (mass2 x intensity) </option>
+				<option value="max_intensity_max_mass">Highest mass between the 3 highest intensity (following intensity threshold and rules ==> see help) </option>
+			</param>
+			<when value="max_intensity_max_mass">
+				<param help="" label="Minimum intensity threshold for the representative ion" type="float" name="int_threshold" value="1000"/>
+				<param help="Example: ion A have the highest intensity of a group but not the highest mass, B is an ion that have the second highest intensity in the group and a highest mass than A, to choose B as a representative ion for the group his intensity need to be at list 50% of the A intensity." label="Percentage of highest intensity of the group accept for the new representative ion. This option allow to avoid isotope selection. " type="float" name="int_percentage" value="0.5"/>
+			</when>
+			<when value="intensity">
+			</when>
+			<when value="mass">
+			</when>
+			<when value="mixt">
+			</when>
+		</conditional>
+
+	</inputs>
+
+	<outputs>
+		<data format="sif" label="${file_in.name}_sif" name="sif_out"/>
+		<data format="tabular" label="${variableMetadata_in.name}_representative_ion" name="variableMetadata_out"/>
+	</outputs>
+
+	<help><![CDATA[
+
+.. class:: infomark
+
+**Contact** : **Stephanie Monnerie**, **Estelle Pujos-Guillot**
+
+---------------------------------------------------
+
+.. class:: infomark
+
+**References** :
+
+---------------------------------------------------
+
+-----------
+Input files
+-----------
+
++-----------------------------------------+---------------+
+| File                                    |     Format    |
++=========================================+===============+
+| 1)  Similarity matrix                  |  txt          |
++-----------------------------------------+---------------+
+| 2)  Data matrix                         |  tabular      |
++-----------------------------------------+---------------+
+| 3)  Variable metadata                   |  tabular      |
++-----------------------------------------+---------------+
+| **Optional file**                       |   **Format**  |
++-----------------------------------------+---------------+
+| 4)  Optional : Mass differences list    |  csv/tabular  |
++-----------------------------------------+---------------+
+
+---------------------------------------------------
+
+-------------
+Files content
+-------------
+
+Similarity matrix
+	* File organisation : on line by similarity pairs with the first ion ID, the similarity value and the second ion ID, tabular separated ==> Fist_Ion_ID \\t Similarity_Value \\t Second_Ion_ID
+	* Example:
+
+.. image:: similarity_matrix.JPG
+	:width: 800
+
+Data matrix file
+	* "variable x sample" **dataMatrix** : tabular separated file of the numeric data matrix, with . as decimal, and NA for missing values; the table must not contain metadata apart from row and column names; the row and column names must be identical to the rownames of the variable metadata (see below)
+
+Variable metadata file
+	* "variable x metadata" **variableMetadata** tabular separated file of the numeric and/or character variable metadata, with . as decimal and NA for missing values
+
+.. class:: warningmark
+
+For more information about input files, refer to the corresponding "W4M HowTo" page:
+http://workflow4metabolomics.org/sites/workflow4metabolomics.org/files/files/w4m_TableFormatForGalaxy_150908.pdf
+
+
+Mass differences list
+	* A file containing list of known adducts, fragments or isotopes with the mass differences linked to them
+	* Example:
+
+.. image:: Adduct_fragment_list.JPG
+	:width: 350
+
+---------------------------------------------------
+
+----------
+Parameters
+----------
+
+Take into account mass diffrences between 2 ions :
+	* You can enter a list of mass differences that are known. The file must be organized with a first column for the mass difference type (isotope, fragment, etc...), a second column with the mass difference chemical formula (H+, -2H+K, etc...) and a third column for the mass difference value
+	* If you are choosing to use a mass differences table, you have to choose a mass difference range that will be a threshold to accept or not a difference value as true (recognize a mass difference value in the file +/- this threshold).
+
+Take into acount retention time :
+	* You can use retention time as a criteria to group ions. You have to choose a value that will be use as intervalle : 2 ions are group when their retention time is equal +/- the threshold.
+
+Choose the representative ion for each group, there are 3 possibilities to determine the representative ion :
+	* The ion with the highest intensity (recommandated for LC/MS)
+	* The ion with the highest mass
+	* The ion with the highest "mass2 * intensity" value
+	* The ion with the highest mass between the 3 highest intensity of the group, except if the highest mass ion have an intensity < determined percentage of the highest intensity ion one (for exemple 50%) (recommandated for GC/MS)
+
+
+---------------------------------------------------
+
+--------------
+Example of use
+--------------
+
+For UPLC/HRMS data, default parameters can be the following:
+	* If a Pearson correlation is used, the default threshold can be set at 0.90
+	* A delta RT of 0.1 min or adjusted depending on chromatographic systems
+	* The use of the list of known adduct/isotope mass differences with a mass delta of 0.005 Da or adjusted depending on MS resolution
+	* The choice of the ion with the highest intensity as the representative ion.
+For GC/HRMS dataset, we recommend to use the same parameters but ignoring the list of mass difference and to choose the ion with the highest mass among the top highest intensity as representative.
+
+
+
+	]]></help>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ACF/data/default_list.csv	Fri Oct 18 04:59:51 2019 -0400
@@ -0,0 +1,225 @@
+adduit	-2H+Na+K	59.9378259
+adduit	H	1.007825032
+adduit	-H+K	37.95588165
+adduit	-H+Na	21.98194425
+adduit	-3H+3Na	65.94583274
+adduit	-4H+4K	151.8235266
+adduit	-4H+4Na	87.92777699
+adduit	-3H+3K	113.8676449
+adduit	-2H+2K	75.9117633
+adduit	-2H+2Na	43.9638885
+adduit	2H 	2.015650064
+adduit	Cl 	34.96885268
+adduit	-2H+Ca 	37.94694092
+isotope	13C db 	0.501677419
+isotope	13C	1.003354838
+isotope	15N	0.997034893
+isotope	18O	2.00424638
+isotope	34S	1.9957959
+isotope	41K	1.99811908
+isotope	37Cl	1.99704991
+isotope	13C2	2.006709676
+isotope	13C3	3.010064513
+isotope	13C+37Cl	3.000404748
+isotope	13C+18O	3.007601218
+isotope	13C+34S	2.999150738
+isotope	44Ca	3.99289082
+adduit	CH3OH	32.02621475
+adduit	CH3CN	41.0265491
+adduit	H2O	18.01056468
+adduit	2(H2O	36.02112937
+adduit	NaCl 	57.95862196
+adduit	HCOOH	46.0054793
+adduit	+(HCOOH)+(HCOOK)	129.9668403
+adduit	+(HCOOH)+(HCOONa)	113.9929029
+adduit	+(HCOOH)+2(HCOONa)	181.9803264
+adduit	HCOOK	83.96136095
+adduit	+(HCOOK)+(HCOONa)	151.9487845
+adduit	HCOONa	67.98742355
+adduit	2(HCOOH)	92.01095861
+adduit	+2(HCOOH)+(HCOOK)	175.9723196
+adduit	+2(HCOOH)+(HCOONa)	159.9983822
+adduit	2(HCOOK)	167.9227219
+adduit	2(HCOONa)	135.9748471
+fragment	C11H18O9	294.0950822
+fragment	C12H16O12	352.064176
+fragment	C12H20O9	308.1107322
+fragment	C2H2O	42.01056468
+fragment	C2H3.	27.0229265
+fragment	C2H3N	41.0265491
+fragment	C2H3NO3	89.01129296
+fragment	C2H3O.	43.01784112
+fragment	C2H4	28.03130013
+fragment	C2H4N.	42.03382553
+fragment	C2H4O	44.02621475
+fragment	C2H5.	29.03857656
+fragment	C2H5N	43.04219916
+fragment	C2H5NO2	75.0320284
+fragment	C2H5O.	45.03349118
+fragment	C2H5O6P	155.9823745
+fragment	C2H6	30.04695019
+fragment	C2H7N	45.05784922
+fragment	C2HNO2	71.00072827
+fragment	C3H4O3	88.01604399
+fragment	C3H5.	41.03857656
+fragment	C3H5NO2	87.0320284
+fragment	-(C3H5O2NS)-(NH3)	136.0306485
+fragment	C3H5O2NS	119.0040994
+fragment	C3H6	42.04695019
+fragment	C3H6O3	90.03169405
+fragment	C3H7.	43.05422662
+fragment	C3H7O2N	89.04767846
+fragment	C3H7O2NS	121.0197495
+fragment	C3H7O6P	169.9980246
+fragment	C4H6	54.04695019
+fragment	C4H6O2	86.03677943
+fragment	C4H6O4	118.0266087
+fragment	C4H7.	55.05422662
+fragment	C4H8O3	104.0473441
+fragment	C4H9	57.07042529
+fragment	C5H7O3N	129.0425931
+fragment	C5H8O3NS	162.0224891
+fragment	C5H8O4	132.0422587
+fragment	C6H10O4	146.0579088
+fragment	-(C6H10O5)-(H2O)	180.0633881
+fragment	C6H10O5	162.0528234
+fragment	C6H10O7	194.0426527
+fragment	C6H8O6	176.032088
+fragment	CH2O	30.01056468
+fragment	-(CH2S)-(HCOOH)	91.99320037
+fragment	-(CH2S)-(NH3)	63.01427016
+fragment	CH2S	45.98772106
+fragment	CH3.	15.0229265
+fragment	CH3COO.	59.01275574
+fragment	CH3COOH	60.02112937
+fragment	CH3N	29.0265491
+fragment	CH3O.	31.01784112
+fragment	CH3OH	32.02621475
+fragment	CH4	16.03130013
+fragment	CH4N.	30.03382553
+fragment	-(CH4S)-(HCOOH)	94.00885043
+fragment	-(CH4S)-(NH3)	65.02992022
+fragment	CH4S	48.00337113
+fragment	CH5N	31.04219916
+fragment	Cl.	34.96830408
+fragment	CO	27.99491462
+fragment	-(CO2)-(CO)	71.98474386
+fragment	CO2	43.98982924
+fragment	-(H2)-(NH3)	19.04219916
+fragment	H2	2.015650064
+fragment	-(H2O)-(CO2)	62.00039392
+fragment	-(H2O)-(HCOOH)	64.01604399
+fragment	-(H2O)-(NH3)	35.03711378
+fragment	H2O	18.01056468
+fragment	-(H2O)-2(CO2)	105.9902232
+fragment	-(H2S)-(H2O)	51.99828575
+fragment	H2S	33.98772106
+fragment	H2SO4	97.96737954
+fragment	H3PO4	97.97689521
+fragment	HCl	35.97667771
+fragment	HCN	27.01089903
+fragment	-(HCOOH)-(HCN)	73.01637834
+fragment	HCOOH	46.0054793
+fragment	HS.	32.97934743
+fragment	-(NC3H9)-(CH3COOH)	119.0946287
+fragment	-(NC3H9)-(H2O)	77.08406397
+fragment	-(NC3H9)-(HCOOH)	105.0789786
+fragment	NC3H9	59.07349929
+fragment	NaCl	57.95862196
+fragment	NH2CO.	44.01309008
+fragment	-(NH3)-(CO2)-(H2O)	79.02694302
+fragment	-(NH3)-(CO2)	61.01637834
+fragment	-(NH3)-(CONH)	60.03236275
+fragment	-(NH3)-(HCOOH)	63.0320284
+fragment	NH3	17.0265491
+fragment	NH3CO	45.02146372
+fragment	NHCO	43.00581365
+fragment	OH.	17.00219105
+fragment	PO3	78.95850549
+fragment	SO2	63.96190024
+fragment	SO3	79.95681486
+fragment	-2(H2O)-(CO2)	80.01095861
+fragment	-2(H2O)-(HCOOH)-(NH3)	99.05315777
+fragment	-2(H2O)-(HCOOH)	82.02660867
+fragment	2(H2O)	36.02112937
+fragment	2(HCOOH)	92.01095861
+fragment	-2(NH3)-(CO)-(CO2)	106.0378421
+fragment	-2(NH3)-(CO)	62.04801281
+fragment	2(NH3)	34.05309819
+fragment	3(H2O)	54.03169405
+fragment	3(NH3)	51.07964729
+fragment	4(H2O)	72.04225874
+fragment	C10H11O3N5	249.0861892
+fragment	C10H13O4N5	267.0967539
+fragment	C10H14O7N5P	347.0630844
+fragment	C10H15O5N5	285.1073186
+fragment	C2H3NO2	73.01637834
+fragment	C2H4O2	60.02112937
+fragment	C2H5NO3	91.02694302
+fragment	C2H6O2	62.03677943
+fragment	C2H6O3	78.03169405
+fragment	-(C2H6O3)-(H2O)	96.04225874
+fragment	C2H6O4	94.02660867
+fragment	C2H7NO2	77.04767846
+fragment	C3H10O5	126.0528234
+fragment	-(C3H6O3)-(CHNO)	133.0375077
+fragment	C3H6O4	106.0266087
+fragment	C3H8O3	92.04734412
+fragment	C3H8O4	108.0422587
+fragment	C4H10O5	138.0528234
+fragment	C4H5NO3	115.026943
+fragment	C4H8O4	120.0422587
+fragment	C5H10O4	134.0579088
+fragment	C5H13O4N	151.0844579
+fragment	C6H11O4N	161.0688078
+fragment	C6H11O5N	177.0637225
+fragment	C6H13O5N	179.0793725
+fragment	C5H10O5	150.0528234
+fragment	C5H10O6	166.047738
+fragment	C5H12O2	104.0837296
+fragment	-(C5H12O2)-(H2O)	122.0942943
+fragment	C5H5N5	135.0544952
+fragment	C5H5ON5	151.0494098
+fragment	C5H6O2	98.03677943
+fragment	C5H7O2N5	169.0599745
+fragment	-(C5H7O3N)-(CO2)	173.0324223
+fragment	-(C5H7O3N)-(H2O)	147.0531578
+fragment	C5H8N3	110.0718223
+fragment	C5H8O3	116.0473441
+fragment	C5H8O5N5P	249.026305
+fragment	C5H9O3	117.0551691
+fragment	C5H9O6P	196.0136746
+fragment	C5H9O7P	212.0085893
+fragment	C6H10O3	130.0629942
+fragment	-(C6H10O3)-(H2O)	148.0735589
+fragment	C6H11O4N3PS	252.0207885
+fragment	C6H11O4NPS	224.0146405
+fragment	C6H12O5	164.0684735
+fragment	C6H14O6	182.0790382
+fragment	C6H14O7	198.0739528
+fragment	C6H16O7	200.0896029
+fragment	C6H16O8	216.0845175
+fragment	C6H8N3	122.0718223
+fragment	C6H8NS	126.0377453
+fragment	C7H5ON5	175.0494098
+fragment	C7H6ON6	190.0603088
+fragment	C7H7O2N5	193.0599745
+fragment	C7H11O6N	205.0586371
+fragment	C8H14O7	222.0739528
+fragment	C8H5O3N5	219.039239
+fragment	C8H7O4N5	237.0498037
+fragment	C9H10O4N2	210.0640568
+fragment	C9H11O3N3	209.0800412
+fragment	C9H11O4N3	225.0749558
+fragment	C9H12O5N2	228.0746215
+fragment	C9H12O6N3P	289.0463717
+fragment	C9H13O4N3	227.0906059
+fragment	C9H14O7N3P	307.0569364
+fragment	C9H16O8	252.0845175
+fragment	CH2N2	42.02179806
+fragment	-(CH2O)-(H2O)	48.02112937
+fragment	CH5NO	47.03711378
+fragment	-(H3PO4)-(CHNO)	140.9827089
+fragment	-(H3PO4)-(H2O)	115.9874599
+fragment	-(H3PO4)-(NH3)	115.0034443
+fragment	HPO3	79.96633052
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ACF/lib/IonFiltration.pm	Fri Oct 18 04:59:51 2019 -0400
@@ -0,0 +1,181 @@
+#!usr/bin/perl
+package IonFiltration;
+
+### Perl modules
+use strict;
+use warnings;
+
+
+
+
+
+
+########################################################################
+### Création of a hash containing all adduits and fragments possible ###
+########################################################################
+
+
+sub MassCollecting{
+
+	my $mass_file = $_[0];
+	my %hmass;
+
+	open (F1, $mass_file);
+
+	while(my $line = <F1>){
+		chomp $line;
+		my @tline = split(/[\t;]/, $line);
+		if(defined($hmass{$tline[2]})){
+			print "The mass difference already exists : $tline[2] !\n";
+		}
+		$hmass{$tline[1]}{$tline[2]}=$tline[0];
+	}
+
+	close F1;
+	return %hmass;
+
+}
+
+
+
+
+
+
+
+########################################################
+### Creation of a sif table + correlation filtration ###
+########################################################
+
+
+sub sifTableCreation{
+
+	my $file = $_[0];
+	my $output_sif = $_[1];
+#	my $opt = $_[2];
+#	my $rt_threshold = $_[3];
+#	my $mass_threshold = $_[4];
+	my $correl_threshold = $_[5];
+#	my $dataMatrix = $_[6];
+#	my $output_tabular = $_[7];
+	my $combined_DMVM = $_[8];
+#	my $repres_opt = $_[9];
+#	my $intensity_threshold = $_[10];
+#	my $intensity_pourc = $_[11];
+#	my $refhmass = $_[12];
+
+
+
+
+	my %hheader_file;
+	my %hduplicate;
+
+	my %hcorrelgroup;
+	my $groupct=1;
+
+
+	my $linenb3=0;
+	my %hheader_line;
+	my %hrtmz;
+
+	open (F5, $combined_DMVM);
+	while(my $line = <F5>){
+		chomp $line;
+		my @tline = split(/\t/, $line);
+
+		if($linenb3 == 0){
+			for(my $i=0; $i<scalar(@tline);$i++){
+				my $a = $tline[$i];
+				$hheader_line{$a}=$i;
+			}
+		}
+		else{
+			if(defined($hheader_line{mzmed})){
+				my $b = $tline[$hheader_line{mzmed}];
+				$hrtmz{$tline[0]}{mz}=$b;
+			}
+			else{
+				my $b = $tline[$hheader_line{mz}];
+				$hrtmz{$tline[0]}{mz}=$b;
+			}
+			if(defined($hheader_line{rtmed})){
+				my $d = $tline[$hheader_line{rtmed}];
+				$hrtmz{$tline[0]}{rt}=$d;
+			}
+			else{
+				my $d = $tline[$hheader_line{rt}];
+				$hrtmz{$tline[0]}{rt}=$d;
+			}
+		}
+
+		$linenb3 ++;
+	}
+	close F5;
+
+
+	my $linenb=0;
+
+	open (F1, $file) or die "Impossible to open $file\n";
+	open(F2, ">$output_sif") or die "Impossible to open $output_sif\n";
+
+
+	while(my $line = <F1>){
+		chomp $line;
+		my @tline = split(/\t/, $line);
+
+		###############################
+		### Création of a sif table ###
+		###############################
+
+		if($linenb == 0){
+			for(my $i=0; $i<scalar(@tline);$i++){
+				my $a = $tline[$i];
+				$hheader_file{$i}=$a;
+			}
+		}
+		else{
+			for(my $i=1; $i<scalar(@tline);$i++){
+				my $a=$tline[0];
+				my $b=$hheader_file{$i};
+				my $coef=$tline[$i];
+
+				if($a eq $b){
+	#				print "This is a correlation between A ($a) and A ($b) !\n"
+				}
+				else{
+
+					#########################
+					### Remove duplicates ###
+					#########################
+
+					my $y = $a."/".$b;
+					my $z = $b."/".$a;
+
+					if((!(defined($hduplicate{$y}))) && (!(defined($hduplicate{$z})))){
+
+						$hduplicate{$y}=1;
+#						my $abcoef=abs($coef); # Only when you want to consider negative correlations
+
+#						if($abcoef > $correl_threshold){ # Only when you want to consider negative correlations
+						if($coef > $correl_threshold){
+
+							print F2 "$a\t$coef\t$b\n";
+
+							my $count=0;
+
+						}
+					}
+				}
+			}
+		}
+		$linenb ++;
+	}
+	close F1;
+	close F2;
+	return ($output_sif, %hrtmz);
+}
+
+
+
+
+
+1;
\ No newline at end of file
Binary file ACF/static/images/Adduct_fragment_list.JPG has changed
Binary file ACF/static/images/similarity_matrix.JPG has changed