Mercurial > repos > fgiacomoni > hmdb_ms_search

package lib::hmdb ;

use strict;
use warnings ;
use Exporter ;
use Carp ;

use LWP::Simple;
use LWP::UserAgent;
use WWW::Mechanize qw();
use URI::URL;
use SOAP::Lite;
use Encode qw(encode_utf8);
use HTML::Template ;
use XML::Twig ;
use Text::CSV ;

use Data::Dumper ;

use vars qw($VERSION @ISA @EXPORT %EXPORT_TAGS);

our $VERSION = "1.0";
our @ISA = qw(Exporter);
our @EXPORT = qw( parseHmdb5CSVResults getMatchesFromHmdb5WithUA map_suppl_data_on_hmdb_results get_unik_ids_from_results get_hmdb_metabocard_from_id extract_sub_mz_lists test_matches_from_hmdb_ua prepare_multi_masses_query get_matches_from_hmdb_ua parse_hmdb_csv_results set_html_tbody_object add_mz_to_tbody_object add_entries_to_tbody_object write_html_skel set_lm_matrix_object set_hmdb_matrix_object_with_ids add_lm_matrix_to_input_matrix write_csv_skel write_csv_one_mass );
our %EXPORT_TAGS = ( ALL => [qw( parseHmdb5CSVResults getMatchesFromHmdb5WithUA map_suppl_data_on_hmdb_results get_unik_ids_from_results get_hmdb_metabocard_from_id extract_sub_mz_lists test_matches_from_hmdb_ua prepare_multi_masses_query get_matches_from_hmdb_ua parse_hmdb_csv_results set_html_tbody_object add_mz_to_tbody_object add_entries_to_tbody_object write_html_skel set_lm_matrix_object set_hmdb_matrix_object_with_ids add_lm_matrix_to_input_matrix write_csv_skel write_csv_one_mass )] );

=head1 NAME

My::Module - An example module

=head1 SYNOPSIS

    use My::Module;
    my $object = My::Module->new();
    print $object->as_string;

=head1 DESCRIPTION

This module does not really exist, it
was made for the sole purpose of
demonstrating how POD works.

=head1 METHODS

Methods are :

=head2 METHOD new

	## Description : new
	## Input : $self
	## Ouput : bless $self ;
	## Usage : new() ;

=cut

sub new {
    ## Variables
    my $self={};
    bless($self) ;
    return $self ;
}
### END of SUB


=head2 METHOD extract_sub_mz_lists

	## Description : extract a couples of sublist from a long mz list (more than $HMDB_LIMITS)
	## Input : $HMDB_LIMITS, $masses
	## Output : $sublists
	## Usage : my ( $sublists ) = extract_sub_mz_lists( $HMDB_LIMITS, $masses ) ;

=cut
## START of SUB
sub extract_sub_mz_lists {
	## Retrieve Values
    my $self = shift ;
    my ( $masses, $HMDB_LIMITS ) = @_ ;

    my ( @sublists, @sublist ) = ( (), () ) ;
    my $nb_mz = 0 ;
    my $nb_total_mzs = scalar(@{$masses}) ;

    if ($nb_total_mzs == 0) {
    	die "The provided mzs list is empty" ;
    }

    for ( my $current_pos = 0 ; $current_pos < $nb_total_mzs ; $current_pos++ ) {

    	if ( $nb_mz < $HMDB_LIMITS ) {
    		if ( $masses->[$current_pos] ) { 	push (@sublist, $masses->[$current_pos]) ; $nb_mz++ ;	} # build sub list
    	}
    	elsif ( $nb_mz == $HMDB_LIMITS ) {
    		my @tmp = @sublist ; push (@sublists, \@tmp) ; @sublist = () ;	$nb_mz = 0 ;
    		$current_pos-- ;
    	}
    	if ($current_pos == $nb_total_mzs-1) { 	my @tmp = @sublist ; push (@sublists, \@tmp) ; }
	}
    return(\@sublists) ;
}
## END of SUB

=head2 METHOD prepare_multi_masses_query

	## Description : Generate the adapted format of the mz list for HMDB
	## Input : $masses
	## Output : $hmdb_masses
	## Usage : my ( $hmdb_masses ) = prepare_multi_masses_query( $masses ) ;

=cut
## START of SUB
sub prepare_multi_masses_query {
	## Retrieve Values
    my $self = shift ;
    my ( $masses ) = @_ ;

    my $hmdb_masses = undef ;
    my $sep = '%0D%0A' ; ## retour chariot encode
    my ($nb_masses, $i) = (0, 0) ;

    if ( defined $masses ) {
    	my @masses = @{$masses} ;
    	my $nb_masses = scalar ( @masses ) ;
    	if ( $nb_masses == 0 ) { croak "The input method parameter mass list is empty" ; }
    	elsif ( $nb_masses >= 700 ) { croak "Your mass list is too long : HMDB allows maximum 700 query masses per request \n" ; } ## Del it --- temporary patch

	    foreach my $mass (@masses) {

	    	if ($i < $nb_masses) {
	    		$hmdb_masses .= $mass.$sep ;
	    	}
	    	elsif ( $i == $nb_masses ) {
	    		$hmdb_masses .= $mass ;
	    	}
	    	else {
	    		last ;
	    	}
	    	$i ++ ;
	    }
    }
    else {
    	croak "No mass list found \n" ;
    }
    return($hmdb_masses, $nb_masses) ;
}
## END of SUB


=head2 METHOD prepareAdductListFormat

	## Description : prepare a adduct list well formatted for https queries
	## Input : $adductString
	## Output : $formattedAdductString
	## Usage : my ( $formattedAdductString ) = prepareAdductListFormat ( $adductString ) ;

=cut
## START of SUB
sub prepareAdductListFormat {
    ## Retrieve Values
    my $self = shift ;
    my ( $adductString ) = @_;
    my ( $formattedAdductString, $nbAdducts ) = ( undef, 0 ) ;

    ## Formatting is converting [+] in %2B, [-] as -  and [,] in converted space as 'M%2BH%202M%2BH' for 'M+H,2M+2H'
#    print "\t$adductString ..." ;

	if (defined $adductString) {

		## counting selected adducts
		$nbAdducts = scalar( my @adducts = ( split (/,/, $adductString) ) ) ;

		## Converting string into http post format
		$adductString =~ s/\+/%2B/g ;
		$adductString =~ s/,/%20/g ;
		$formattedAdductString = $adductString ;
	}
	else {
		warn "\t[WARN]the adduct type is not defined...It will set to 'Unknown'\n" ;
	}

#	print "->$formattedAdductString\n" ;

    return ($formattedAdductString, $nbAdducts) ;
}
### END of SUB

=head2 METHOD test_matches_from_hmdb_ua DEPRECATED

	## Description : [DEPRECATED]test a single query with tests parameters on hmdb - get the status of the complete server infra.
	## Input : none
	## Output : $status_line
	## Usage : my ( $status_line ) = test_matches_from_hmdb_ua( ) ;

=cut
## START of SUB
sub test_matches_from_hmdb_ua {
	## Retrieve Values
    my $self = shift ;

    my @page = () ;

	my $ua = new LWP::UserAgent;
	$ua->agent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:64.0) Gecko/20100101 Firefox/64.0");

	my $url = 'http://specdb.wishartlab.com/ms/search.csv';
	my $header = ['Connection' => 'keep-alive', 'Content-Type' => 'application/x-www-form-urlencoded', 'Referer' => 'http://www.hmdb.ca/spectra/ms/search', 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'];

	my $req = HTTP::Request->new('POST', $url, $header);
#	print Dumper $req ;

	## Query format for HMDB 4.0
	#"utf8=✓&authenticity_token=K1Ys7oyMKmVNm9n8p0jiTxBlh4G4OO0cqKZYnQKDCw0pM6zmm/CiBxv+/cXhuRsVFV98LLeAMJRN5dCyhIWlAA==&query_masses=175.01 238.19 420.16 780.32 956.25 1100.45&ms_search_ion_mode=positive&adduct_type[]=Unknown&tolerance=0.05&tolerance_units=Da&commit=Search"

	$req->content('utf8=TRUE&mode=positive&query_masses=420.159317&tolerance=0.000001&database=HMDB&commit=Download Results As CSV');
#	print Dumper $req ;
	my $res = $ua->request($req);

	print $res->as_string;
	my $status_line = $res->status_line ;
	($status_line) = ($status_line =~ /(\d+)/);

	return (\$status_line) ;
}
## END of SUB


=head2 METHOD checkHmdbUrlEntries

	## Description : check a list of HMDB_IDs by testing what is the status of their uri (HTTP 200 or not)
	## Input : $urlRoot, $entries
	## Output : $validEntries
	## Usage : my ( $validEntries ) = checkHmdbUrlEntries ( $entries ) ;

=cut
## START of SUB
sub checkHmdbUrlEntries {
    ## Retrieve Values
    my $self = shift ;
    my ( $urlRoot, $clusters, $maxQuery ) = @_;
    my ( @ValidResults, @validFeature ) = ( (), () ) ;

    foreach my $resultsByMass (@{$clusters}) {
		my $currentQuery = 0 ;
    	foreach my $feature (@{$resultsByMass}) {

			if ($feature->{ENTRY_ENTRY_ID}) {

				my $url = $urlRoot.$feature->{ENTRY_ENTRY_ID} ;
		    	if ( get($url)) {
		    		$currentQuery ++ ;
		    		if ($currentQuery > $maxQuery) {
    					last ;
    				}
    				else {
			    		push (@validFeature, $feature) ;
#			    		print "Url is valid ($url) - - add to final results\n" ;
    				}
		    	}
		    	else {
#		    		print "Url is NOT valid ($url) - - Deleting to final results\n" ;
		    		next ;
		    	}
			}
    	} # END FOREACH FEATURE
    	my @tmp = @validFeature ;
    	push (@ValidResults, \@tmp) ;
    	@validFeature = () ;
    } # END FOREACH RESULTS

    return (\@ValidResults) ;
}
### END of SUB


=head2 METHOD testMatchesFromHmdbWithUA

	## Description : [DEPRECATED] test a single query with tests parameters on hmdb - get the status of the complete server infra.
	## Input : none
	## Output : $status_line
	## Usage : my ( $status_line ) = testMatchesFromHmdbWithUA( ) ;

=cut
## START of SUB
sub testMatchesFromHmdbWithUA {
	## Retrieve Values
    my $self = shift ;

    my @page = () ;
    #based on https://stackoverflow.com/questions/17732916/perl-post-automation-and

	my $mech = WWW::Mechanize->new(
#		agent => 'wonderbot for W4M 1.01',
		agent => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:93.0) Gecko/20100101 Firefox/93.0' ,
		autocheck => 0,
	);

	my $statusGetLine = 0 ;
	my $statusPostLine = 0 ;
	my $csrftoken = undef ;

	#receiving cookies and authentication token (CFRS)
	my $reqInit = $mech->get("http://www.hmdb.ca/spectra/ms/search");
	$statusGetLine = $mech->status() ;

	if ($statusGetLine == 200 ) {
		die 'no CSRF_REQUEST_TOKEN_VALUE in page found'
		unless ($reqInit->decoded_content =~ /\"csrf-token\"\s+content=\"(.*)\"/) ;
		$csrftoken = $1;
		print "\nTOKEN: $csrftoken\n" ;
		$mech->add_header("X-CSRFToken", $csrftoken);
		$mech->add_header('Host', 'specdb.wishartlab.com');
		$mech->add_header('Connection', 'keep-alive');
		$mech->add_header('Upgrade-Insecure-Requests', '1');
		$mech->add_header('Content-Type', 'application/x-www-form-urlencoded');
		$mech->add_header('Accept-Language', 'en-US,en;q=0.5');
		$mech->add_header('Accept-Encoding', 'gzip, deflate');
#		$mech->add_header('Content-Length', "300");
		$mech->add_header('Origin', 'null');
		$mech->add_header('DNT', '1');
		$mech->add_header('Referer', 'https://hmdb.ca/spectra/ms/search');
#		$mech->add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8');
		$mech->add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8');


	}

	## POST test
	my $top = 1 ;
	while ($statusPostLine != 200 ) {

		# Fix a limit at 3 tries...
		if ($top < 4) {
			print "\tTesting HMDB server connexion ($top time(s) )...\n" ;
			eval {
				$mech->post(
					"http://specdb.wishartlab.com/ms/search.csv",
			    	Content => 'utf8=TRUE&authenticity_token='.$csrftoken.'&mode=positive&adduct_type=M%2BH%202M%2BH&query_masses=125.0089&tolerance=0.001&database=HMDB&commit=Download Results As CSV'
				);
			} ;
#		print Dumper $mech ;
			$statusPostLine = $mech->status() ;
			print "Status: $statusPostLine" ;
		}
		else {
			last ;
		}
		$top++ ;
	}## End While
	return (\$statusPostLine) ;
}
## END of SUB


=head2 METHOD testMatchesFromHmdb5WithUA

	## Description : test a single query with tests parameters on hmdb - get the status of the complete server infra (API V5.0 compliant).
	## Input : none
	## Output : $status_line
	## Usage : my ( $status_line ) = testMatchesFromHmdb5WithUA( ) ;

=cut
## START of SUB
sub testMatchesFromHmdb5WithUA {
	## Retrieve Values
    my $self = shift ;

my $mech = WWW::Mechanize->new(
		agent => 'wonderbot for W4M 3.0',
		autocheck => 1,
		timeout => 2400,
	);

	my $statusGetLine = 0 ;
	my $statusPostLine = 0 ;

	#receiving cookies and authentication token (CFRS)
	my $reqInit = $mech->get("https://www.hmdb.ca/spectra/ms/search");
	$statusGetLine = $mech->status() ;

	if ($statusGetLine == 200 ) {
		die 'no CSRF_REQUEST_TOKEN_VALUE in page found'
		unless ($reqInit->decoded_content =~ /\"csrf-token\"\s+content=\"(.*)\"/) ;
		my $csrftoken = $1;
#		print "\nTOKEN: $csrftoken\n" ;
		$mech->add_header("X-CSRFToken", $csrftoken);
		$mech->add_header('Connection', 'keep-alive');
		$mech->add_header('Content-Type', 'application/x-www-form-urlencoded');
		$mech->add_header('Referer', 'https://www.hmdb.ca/spectra/ms/search');
		$mech->add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8');
	}


	## POST test
	my $top = 1 ;
	while ($statusPostLine != 200 ) {

		# Fix a limit at 3 tries...
		if ($top < 4) {
			print "\tTesting HMDB server connexion ($top time(s) )...\n" ;
			eval {
				my $res = $mech->get(
					'https://hmdb.ca/spectra/ms/generate_csv.csv?'
					.'results%5Baction%5D=search'
					.'&results%5Badduct_type%5D%5B%5D=M%2BH%202M%2BH'
					#.'&results%5Bauthenticity_token%5D='
					.'&results%5Bccs_predictors%5D='
					.'&results%5Bccs_tolerance%5D='
					.'&results%5Bcommit%5D=Search'
					.'&results%5Bcontroller%5D=specdb%2Fms'
					.'&results%5Bms_search_ion_mode%5D=positive'
					.'&results%5Bquery_masses%5D=125.0089'
					.'&results%5Btolerance%5D=0.001'
					.'&results%5Btolerance_units%5D=Da'
					.'&results%5Butf8%5D=%E2%9C%93'

				);
			} ;
#		print Dumper $mech ;
			$statusPostLine = $mech->status() ;
			print "Status: $statusPostLine" ;
		}
		else {
			last ;
		}
		$top++ ;
	}## End While
	return (\$statusPostLine) ;
}
## END of SUB

=head2 METHOD check_state_from_hmdb_ua

	## Description : check the thhp status of hmdb and kill correctly the script if necessary.
	## Input : $status
	## Output : none
	## Usage : check_state_from_hmdb_ua($status) ;

=cut
## START of SUB
sub check_state_from_hmdb_ua {
	## Retrieve Values
    my $self = shift ;
    my ($status) = @_ ;

    if (!defined $$status) {
    	croak "No https status is defined for the distant server" ;
    }
    else {
    	unless ( $$status == 200 ) {
    		if  ( $$status == 502 ) { croak "Bad Gateway (502): The HMDB server, while acting as a gateway or proxy, received an invalid response from the upstream server. The Hmdb tool is stopped with error." ; }
    		if  ( $$status == 504 ) { croak "Gateway Timeout (504): The HMDB server was acting as a gateway or proxy and did not receive a timely response from the upstream server. The Hmdb tool is stopped with error." ; }
    		if  ( $$status == 500 ) { croak "Internal Server Error (500): The HMDB server returns an unexpected internal server error. The Hmdb tool is stopped with error." ; }
    		else {
    			## None supported http code error ##
    			croak "Internal Server Error $$status..." ;
    		}
    	}
    	if  ( $$status == 200 ) {
    		print "\tThe HMDB server returns that your request (connexion test) was fulfilled\n" ;
    		print "\tAll searches should be sent successfully to HMDB...(Set verbose to \"High\" for more information!)\n" ;
    	}
    }

    return (1) ;
}
## END of SUB

=head2 METHOD get_matches_from_hmdb_ua DEPRECATED

	## Description : [DEPRECATED]HMDB querying via an user agent with parameters : mz, delta and molecular species (neutral, pos, neg)
	## Input : $mass, $delta, $mode
	## Output : $results
	## Usage : my ( $results ) = get_matches_from_hmdb( $mass, $delta, $mode ) ;

=cut
## START of SUB
sub get_matches_from_hmdb_ua {
	## Retrieve Values
    my $self = shift ;
    my ( $masses, $delta, $mode ) = @_ ;

    ## Added May, 2022
    warn "[DEPRECATED Methode] method get_matches_from_hmdb_ua is deprecated and not compatible with HMDB 4.0" ;
    return ([], 500) ;

    my @page = () ;

	my $ua = LWP::UserAgent->new( keep_alive => 10 );
	$ua->agent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:64.0) Gecko/20100101 Firefox/64.0");
	$ua->timeout(2400) ;

	# Cookies
#	my $cookie = new HTTP::Cookies( ignore_discard => 1 );
#	$ua->cookie_jar( $cookie );

#	my $req = HTTP::Request->new(
#	    POST => 'http://specdb.wishartlab.com/ms/search.csv');

	my $url = 'http://specdb.wishartlab.com/ms/search.csv';
	my $header = ['Content-Type' => 'application/x-www-form-urlencoded', 'Referer' => 'http://www.hmdb.ca/spectra/ms/search', 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'];
	my $data = {mode => $mode, query_masses => $masses, 'tolerance' => $delta, database => 'HMDB', commit => 'Download Results As CSV'};
	my $encoded_data = encode_utf8($data);

	my $req = HTTP::Request->new('POST', $url, $header, $encoded_data);

#	$req->content_type('application/x-www-form-urlencoded');
	$req->content('utf8=TRUE&mode='.$mode.'&query_masses='.$masses.'&tolerance='.$delta.'&database=HMDB&commit=Download Results As CSV');
#	print Dumper $req ;
	my $res = $ua->request($req);
	my $status_line = undef ;
	print $res->as_string;
	if ($res->is_success) {
	     @page = split ( /\n/, $res->decoded_content ) ;
	     $status_line = 'OK' ;
	 } else {
	 	$status_line = $res->status_line ;
	 	($status_line) = ($status_line =~ /(\d+)/);
	 	warn "\t[HMDB service issue !! the server returned a $status_line HTTP error]" ;
	 }


	return (\@page, $status_line) ;
}
## END of SUB

=head2 METHOD getMatchesFromHmdbWithUA

	## Description : HMDB querying via an user agent with parameters : mz, delta and molecular species (neutral, pos, neg)
	## Input : $mass, $delta, $mode, adducts
	## Output : $results
	## Usage : my ( $results ) = getMatchesFromHmdbWithUA( $mass, $delta, $mode ) ;

=cut
## START of SUB
sub getMatchesFromHmdbWithUA {
	## Retrieve Values
    my $self = shift ;
    my ( $masses, $delta, $mode, $adducts ) = @_ ;

    ## Added May, 2022
    warn "[DEPRECATED Methode] method getMatchesFromHmdbWithUA is deprecated and not compatible with HMDB 5.0" ;
    return ([], 500) ;

    my @page = () ;

    #based on https://stackoverflow.com/questions/17732916/perl-post-automation-and

	my $mech = WWW::Mechanize->new(
		agent => 'wonderbot for W4M 1.01',
		autocheck => 1,
		timeout => 2400,
	);

	my $statusGetLine = 0 ;
	my $statusPostLine = 0 ;

	#receiving cookies and authentication token (CFRS)
	my $reqInit = $mech->get("http://www.hmdb.ca/spectra/ms/search");
	$statusGetLine = $mech->status() ;

	if ($statusGetLine == 200 ) {
		die 'no CSRF_REQUEST_TOKEN_VALUE in page found'
		unless ($reqInit->decoded_content =~ /\"csrf-token\"\s+content=\"(.*)\"/) ;
		my $csrftoken = $1;
#		print "\nTOKEN: $csrftoken\n" ;
		$mech->add_header("X-CSRFToken", $csrftoken);
		$mech->add_header('Connection', 'keep-alive');
		$mech->add_header('Content-Type', 'application/x-www-form-urlencoded');
		$mech->add_header('Referer', 'http://www.hmdb.ca/spectra/ms/search');
		$mech->add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8');
	}
	## adduct format is adduct_type=M%2BH%202M%2BH

	if ( (!defined $adducts) or ( $adducts eq '')  ) {
		$adducts = 'Unknown' ;
	}

	my $res = $mech->post(
		"http://specdb.wishartlab.com/ms/search.csv",
    	Content => 'utf8=TRUE&mode='
    		.$mode.'&adduct_type='
    		.$adducts.'&query_masses='
    		.$masses.'&tolerance='
    		.$delta.'&database=HMDB&commit=Download Results As CSV'
	);

	if ($mech->success) {
		@page = split ( /\n/, $res->decoded_content ) ;
		$statusPostLine = 'OK' ;
	}
	else {
		$statusPostLine = $mech->status() ;
		warn "\t[HMDB service issue !! the server returned a $statusPostLine HTTP error]" ;
	}

#	print Dumper $res->decoded_content ;

	return (\@page, $statusPostLine) ;
}
## END of SUB

=head2 METHOD getMatchesFromHmdb5WithUA

	## Description : HMDB v5.0 querying via an user agent with parameters : mz, delta and molecular species (neutral, pos, neg)
	## Input : $mass, $delta, $mode, adducts
	## Output : $results
	## Usage : my ( $results ) = getMatchesFromHmdbWithUA( $mass, $delta, $mode ) ;

=cut
## START of SUB
sub getMatchesFromHmdb5WithUA {
	## Retrieve Values
    my $self = shift ;
    my ( $masses, $delta, $mode, $adducts ) = @_ ;

    my @page = () ;

    #based on https://stackoverflow.com/questions/17732916/perl-post-automation-and

	my $mech = WWW::Mechanize->new(
		agent => 'wonderbot for W4M 3.0',
		autocheck => 1,
		timeout => 2400,
	);

	my $statusGetLine = 0 ;
	my $statusPostLine = 0 ;

	#receiving cookies and authentication token (CFRS)
	my $reqInit = $mech->get("https://www.hmdb.ca/spectra/ms/search");
	$statusGetLine = $mech->status() ;

	if ($statusGetLine == 200 ) {
		die 'no CSRF_REQUEST_TOKEN_VALUE in page found'
		unless ($reqInit->decoded_content =~ /\"csrf-token\"\s+content=\"(.*)\"/) ;
		my $csrftoken = $1;
#		print "\nTOKEN: $csrftoken\n" ;
		$mech->add_header("X-CSRFToken", $csrftoken);
		$mech->add_header('Connection', 'keep-alive');
		$mech->add_header('Content-Type', 'application/x-www-form-urlencoded');
		$mech->add_header('Referer', 'https://www.hmdb.ca/spectra/ms/search');
		$mech->add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8');
	}
	## adduct format is adduct_type=M%2BH%202M%2BH

	if ( (!defined $adducts) or ( $adducts eq '')  ) {
		$adducts = 'Unknown' ;
	}

	my $res = $mech->get(
		'https://hmdb.ca/spectra/ms/generate_csv.csv?'
		.'results%5Baction%5D=search'
		.'&results%5Badduct_type%5D%5B%5D='.$adducts
		#.'&results%5Bauthenticity_token%5D='
		.'&results%5Bccs_predictors%5D='
		.'&results%5Bccs_tolerance%5D='
		.'&results%5Bcommit%5D=Search'
		.'&results%5Bcontroller%5D=specdb%2Fms'
		.'&results%5Bms_search_ion_mode%5D='.$mode
		.'&results%5Bquery_masses%5D='.$masses
		.'&results%5Btolerance%5D='.$delta
		.'&results%5Btolerance_units%5D=Da'
		.'&results%5Butf8%5D=%E2%9C%93'

	);
	$statusGetLine = $mech->status() ;

	if ($mech->success) {
		@page = split ( /\n/, $res->decoded_content ) ;
		$statusPostLine = 'OK' ;
	}
	else {
		$statusPostLine = $mech->status() ;
		warn "\t[HMDB service issue !! the server returned a $statusPostLine HTTP error]" ;
	}

#	print Dumper $res->decoded_content ;

	return (\@page, $statusPostLine) ;
}
## END of SUB


=head2 METHOD parseHmdb5CSVResults

	## Description : parse the csv results and get data - API 5.0 compliant
	## Input : $csv
	## Output : $results
	## Usage : my ( $results ) = parseHmdb5CSVResults( $csv ) ;

=cut
## START of SUB
sub parseHmdb5CSVResults {
	## Retrieve Values
    my $self = shift ;
    my ( $csv, $masses, $max_query ) = @_ ;

    my $test = 0 ;
    my ($query_mass,$compound_id,$formula,$compound_mass,$adduct,$adduct_type,$adduct_mass,$delta) = (0, undef, undef, undef, undef, undef, undef, undef) ;

    my %result_by_entry = () ;
    my %features = () ;

#    print Dumper $csv ;
#    print Dumper $masses ;
#    print Dumper $max_query ;

    foreach my $line (@{$csv}) {
    	## NEW HMDB format V5.0 - May2022
		if ($line !~ /query_mass,compound_id,compound_name,formula,monoisotopic_mass,adduct,adduct_type,adduct_m\/z,"delta\(ppm\),",ccs_value/) {
			#query_mass,compound_id,compound_name,formula,monoisotopic_mass,adduct,adduct_type,adduct_m/z,"delta(ppm),",ccs_value',

    		if ( $line =~ /(\d+\.\d+),(\w+),(.*),(\w+),(\d+\.\d+),([\w|n\/a|\s+]+)\s*,(\+|\-),(\d+\.\d+),(\d+),(\d*)/   ) {
    			print "$line\n" ;
    		#if ( $line =~ /(\d+\.\d+),(\w+),(.*),([\w|n\/a|\s+]+)\s*,(\w+),(\d+\.\d+),(.*),(\+|\-),(\d+\.\d+),(\d+)/  ) {
    			my @entry = ("$1","$2","$3","$4","$5","$6","$7","$8","$9","$10") ;

    			if ( !exists $result_by_entry{$entry[0]} ) { $result_by_entry{$entry[0]} = [] ; }

	    		$features{ENTRY_ENTRY_ID} = $entry[1] ;
	    		$features{ENTRY_ENTRY_NAME} = $entry[2] ;
	    		$features{ENTRY_FORMULA} = $entry[3] ;
	    		$features{ENTRY_CPD_MZ} = $entry[4] ;
	    		$features{ENTRY_ADDUCT} = $entry[5] ;
	    		$features{ENTRY_ADDUCT_TYPE} = $entry[6] ;
	    		$features{ENTRY_ADDUCT_MZ} = $entry[7] ;
	    		$features{ENTRY_DELTA} = $entry[8] ;

	    		my %temp = %features ;
	    		push (@{$result_by_entry{$entry[0]} }, \%temp) ;
    		}
#    		elsif ($line =~ /(\d+\.\d+)/) { #
#    			## 288.082286511284,HMDB0002255,R-Methylmalonyl-CoA, C01213 ,C25H40N7O19P3S,867.131252359,M-3H,-,288.036475,159
#    			## 283.108004472276,"Bicyclo_3,1,1heptane-2,3-diol,2,6,6_trimethyl","2,3-Pinanediol",n/a,C10H18O2,170.13067982,M+TFA-H,-,283.116266,29
#    			## 174.034120330029,HMDB0011723,2-Methylhippuric acid,	 C01586,C10H11NO3,193.073893223,M-H20-H,-,174.055503,123
#    			## 193.139160745841,HMDB0012109,"7-[(1R,2R,3R,5S)-3,5-Dihydroxy-2-[(1E,3S)-3-hydroxyoct-1-en-1-yl]cyclopentyl]-5,6-dihydroxyheptanoic acid", C06475,C20H36O7,388.246103506,M-2H,-,193.115776,121
#    			## 214.018826827064,HMDB0011723,2-Methylhippuric acid,	 C01586,C10H11NO3,193.073893223,M+Na-2H,-,214.048559,139
#    		}
#    		else {
#
#    			warn "The parsed line ($line) does not match your pattern\n " ;
#    		}
    	}
    	else {
    		print "Header detected...Parsing is starting...\n" ;
    		next ;
    	}
    } ## end foreach

    ## manage per query_mzs (keep query masses order by array)
    my @results = () ;
    foreach (@{$masses}) {
    	if ($result_by_entry{$_}) {

    		## cut all entries > $max_query	- all entries were already sorted...by hmdb
    		my @temp_entries = @{$result_by_entry{$_}} ;
    		my @temp_cut = () ;
    		my $current_query = 0 ;
    		foreach (@temp_entries) {
    			$current_query ++ ;
    			if ($current_query > $max_query) {
    				last ;
    			}
    			else {
    				push (@temp_cut, $_) ;
    			}
    		}
    		push (@results, \@temp_cut) ;
    	}
    	else { push (@results, [] ) ; } ;

    }
    return(\@results) ;
}
## END of SUB


=head2 METHOD parse_hmdb_csv_results

	## Description : [DEPRECATED] parse the csv results and get data
	## Input : $csv
	## Output : $results
	## Usage : my ( $results ) = parse_hmdb_csv_results( $csv ) ;

=cut
## START of SUB
sub parse_hmdb_csv_results {
	## Retrieve Values
    my $self = shift ;
    my ( $csv, $masses, $max_query ) = @_ ;

    my $test = 0 ;
    my ($query_mass,$compound_id,$formula,$compound_mass,$adduct,$adduct_type,$adduct_mass,$delta) = (0, undef, undef, undef, undef, undef, undef, undef) ;

    my %result_by_entry = () ;
    my %features = () ;

#    print Dumper $csv ;
#    print Dumper $masses ;
#    print Dumper $max_query ;

    foreach my $line (@{$csv}) {
    	## NEW HMDB format V4.0 - dec2017
		if ($line !~ /query_mass,compound_id,compound_name,kegg_id,formula,monoisotopic_mass,adduct,adduct_type,adduct_m\/z,delta\(ppm\)/) {

    		if ( $line =~ /(\d+\.\d+),(\w+),(.*),([\w|n\/a|\s+]+)\s*,(\w+),(\d+\.\d+),(.*),(\+|\-),(\d+\.\d+),(\d+)/  ) {
    			my @entry = ("$1","$2","$3","$4","$5","$6","$7","$8","$9","$10") ;

    			if ( !exists $result_by_entry{$entry[0]} ) { $result_by_entry{$entry[0]} = [] ; }

	    		$features{ENTRY_ENTRY_ID} = $entry[1] ;
	    		$features{ENTRY_ENTRY_NAME} = $entry[2] ;
	    		$features{ENTRY_FORMULA} = $entry[4] ;
	    		$features{ENTRY_CPD_MZ} = $entry[5] ;
	    		$features{ENTRY_ADDUCT} = $entry[6] ;
	    		$features{ENTRY_ADDUCT_TYPE} = $entry[7] ;
	    		$features{ENTRY_ADDUCT_MZ} = $entry[8] ;
	    		$features{ENTRY_DELTA} = $entry[9] ;

	    		my %temp = %features ;
	    		push (@{$result_by_entry{$entry[0]} }, \%temp) ;
    		}
#    		elsif ($line =~ /(\d+\.\d+)/) { #
#    			## 288.082286511284,HMDB0002255,R-Methylmalonyl-CoA, C01213 ,C25H40N7O19P3S,867.131252359,M-3H,-,288.036475,159
#    			## 283.108004472276,"Bicyclo_3,1,1heptane-2,3-diol,2,6,6_trimethyl","2,3-Pinanediol",n/a,C10H18O2,170.13067982,M+TFA-H,-,283.116266,29
#    			## 174.034120330029,HMDB0011723,2-Methylhippuric acid,	 C01586,C10H11NO3,193.073893223,M-H20-H,-,174.055503,123
#    			## 193.139160745841,HMDB0012109,"7-[(1R,2R,3R,5S)-3,5-Dihydroxy-2-[(1E,3S)-3-hydroxyoct-1-en-1-yl]cyclopentyl]-5,6-dihydroxyheptanoic acid", C06475,C20H36O7,388.246103506,M-2H,-,193.115776,121
#    			## 214.018826827064,HMDB0011723,2-Methylhippuric acid,	 C01586,C10H11NO3,193.073893223,M+Na-2H,-,214.048559,139
#    		}
#    		else {
#
#    			warn "The parsed line ($line) does not match your pattern\n " ;
#    		}
    	}
    	else {
    		next ;
    	}
    } ## end foreach

    ## manage per query_mzs (keep query masses order by array)
    my @results = () ;
    foreach (@{$masses}) {
    	if ($result_by_entry{$_}) {

    		## cut all entries > $max_query	- all entries were already sorted...by hmdb
    		my @temp_entries = @{$result_by_entry{$_}} ;
    		my @temp_cut = () ;
    		my $current_query = 0 ;
    		foreach (@temp_entries) {
    			$current_query ++ ;
    			if ($current_query > $max_query) {
    				last ;
    			}
    			else {
    				push (@temp_cut, $_) ;
    			}
    		}
    		push (@results, \@temp_cut) ;
    	}
    	else { push (@results, [] ) ; } ;

    }
    return(\@results) ;
}
## END of SUB

=head2 METHOD parse_hmdb_page_results

	## Description : [DEPRECATED] old HMDB html page parser
	## Input : $page
	## Output : $results
	## Usage : my ( $results ) = parse_hmdb_page_result( $pages ) ;

=cut
## START of SUB
sub parse_hmdb_page_results {
	## Retrieve Values
    my $self = shift ;
    my ( $page ) = @_ ;

    my @results = () ;
    my ($catch_table, $catch_name) = (0, 0) ;
    my ($name, $adduct, $adduct_mw, $cpd_mw, $delta) = (undef, undef, undef, undef, undef) ;

    if ( defined $page ) {

    	my @page = @{$page} ;
    	my $ID = undef ;
    	my @result_by_mz = () ;
    	my %result_by_entry = () ;

		foreach my $line (@page)   {

			#Section de la page contenant les resultat
			if( $line =~/<table>/ ) { $catch_table = 1 ; }

			## Si il existe un resultat :
			if($catch_table == 1) {

			    #Id de la molecule, et creation du lien
			    if( $line =~ /<a href=\"\/metabolites\/(\w+)\" (.*)>/ )  {
			    	$ID = $1 ;
			    	$catch_name = 0 ;
			    	next ;
			    }
			    #Nom de la molecule ONLY!!
			    if ( $catch_name == 0 ) {

			    	if( $line =~ /<td>(.+)<\/td>/ ) {

			    		if ( !defined $name ) {
			    			$name = $1 ;
			    			$result_by_entry{'ENTRY_ENTRY_ID'} = $ID ;
					    	$result_by_entry{'ENTRY_NAME'} = $name ;
					    	next ;
			    		}
			    		if ( !defined $adduct ) { $adduct = $1 ;  $result_by_entry{'ENTRY_ADDUCT'} = $adduct ; next ; }
			    		if ( !defined $adduct_mw ) {  $adduct_mw = $1 ; $result_by_entry{'ENTRY_ADDUCT_MZ'} = $adduct_mw ; next ; 	}
			    		if ( !defined $cpd_mw ) { $cpd_mw = $1 ; $result_by_entry{'ENTRY_CPD_MZ'} = $cpd_mw ; next ; 	}
			    		if ( !defined $delta ) {
			    			$delta = $1 ;
			    			$result_by_entry{'ENTRY_DELTA'} = $delta ;
			    			$catch_name = 1 ;
			    			my %tmp = %result_by_entry ;
			    			push (@result_by_mz, \%tmp) ;
			    			%result_by_entry = () ;
			    			( $name, $cpd_mw, $delta, $adduct, $adduct_mw ) = ( undef, undef, undef, undef, undef ) ;
			    			next ;
			    		}
				    }
			    }
			}
			#Fin de la section contenant les resultats
			if( $line =~ /<\/table>/ ) {
				$catch_table = 0 ;
				my @Tmp = @result_by_mz ;
				push(@results, \@Tmp) ;
				@result_by_mz = () ;
			}
	    }
    }
    return(\@results) ;
}
## END of SUB


=head2 METHOD get_unik_ids_from_results

	## Description : get all unik ids from the hmdb result object
	## Input : $results
	## Output : $ids, $idsNumber
	## Usage : my ( $ids ) = get_unik_ids_from_results ( $results ) ;

=cut
## START of SUB
sub get_unik_ids_from_results {
    ## Retrieve Values
    my $self = shift ;
    my ( $results ) = @_;
    my ( %ids ) = ( () ) ;

    foreach my $result (@{$results}) {

    	foreach my $entries (@{$result}) {

    		if ( ($entries->{'ENTRY_ENTRY_ID'}) and ($entries->{'ENTRY_ENTRY_ID'} ne '' ) ) {
    			$ids{$entries->{'ENTRY_ENTRY_ID'}} = 1 ;
    		}
    	}
    }
    my $idsNumber = keys %ids ;
    return (\%ids, $idsNumber) ;
}
### END of SUB


=head2 METHOD get_hmdb_metabocard_from_id

	## Description : get a metabocard (xml format from an ID on HMDB)
	## Input : $ids
	## Output : $metabocard_features
	## Usage : my ( $metabocard_features ) = get_hmdb_metabocard_from_id ( $ids ) ;

=cut
## START of SUB
sub get_hmdb_metabocard_from_id {
    ## Retrieve Values
    my $self = shift ;
    my ( $ids, $hmdb_url ) = @_;
    my ( %metabocard_features ) = ( () ) ;
    my $query = undef ;

    ## structure %metabocard_features
    # metabolite_id = (
    #	'metabolite_name' => '__name__',
    #	'metabolite_inchi' => '__inchi__',
    #	'metabolite_logp' => '__logp-ALOGPS__',
    #
    # )


    if( (defined $ids) and  ($ids > 0 ) ) {

    	foreach my $id (keys %{$ids}) {
			print "$id...\n" ;
			my $twig = undef ;

			if (defined $hmdb_url) {
				$query = $hmdb_url.$id.'.xml' ;

				## test the header if exists
				my $response = head($query) ;

				if (!defined $response) {
					$metabocard_features{$id}{'STATUS'} = 'NOT_EXISTING' ;
					$metabocard_features{$id}{'metabolite_name'} = undef ;
					$metabocard_features{$id}{'metabolite_inchi'} = undef ;
					$metabocard_features{$id}{'metabolite_logp'} = undef ;
					## Need to be improve to manage http 404 or other response diff than 200
				} ## IF error
				elsif ( $response->is_success ) {

					$twig = XML::Twig->nparse_ppe(

						twig_handlers => {
							# metabolite name
							'metabolite/name' => sub { $metabocard_features{$id}{'metabolite_name'} = $_ -> text_only ; $metabocard_features{$id}{'STATUS'} = 'EXISTING' ; } ,
							# metabolite inchi
							'metabolite/inchi' => sub { $metabocard_features{$id}{'metabolite_inchi'} = $_ -> text_only ; $metabocard_features{$id}{'STATUS'} = 'EXISTING' ;} ,
							## metabolite logP
							'metabolite/predicted_properties/property' => sub {

								my ($kind, $source, $value ) = ( undef, undef, undef ) ;

								if (defined $_->children ) {
    								foreach my $field ($_->children) {
    									if ( $field->name eq 'kind') 		{ $kind = $field->text ; }
    									elsif ( $field->name eq 'source') 	{ $source = $field->text ; }
    									elsif ( $field->name eq 'value') 	{ $value = $field->text ; }

    									if (defined $source ) {
    										if ( ( $kind eq 'logp' ) and ( $source eq 'ALOGPS' ) ) {
												$metabocard_features{$id}{'metabolite_logp'} = $value ;
												$metabocard_features{$id}{'STATUS'} = 'EXISTING' ;
											}
											($kind, $source, $value ) = ( undef, undef, undef ) ;
    									}
    								}
								}
							}
						},
						pretty_print => 'indented',
						error_context => 1, $query
					);

#				    $twig->print;
					$twig->purge ;

#				    if (!$@) {
#
#				    }
#				    else {
#				    	warn $@ ;
#				    }
				} ## ELSIF success
			} # END if defined URL
			else {
				warn "\tThe hmdb metabocard url is not defined\n" ;
				last;
			}
    	}
    } ## End IF defined ids
    else {
    	warn "The HMDB ids list from HMDB is empty - No metabocard found\n" ;
    }

#    print Dumper %metabocard_features ;
    return (\%metabocard_features) ;
}
### END of SUB


=head2 METHOD map_suppl_data_on_hmdb_results

	## Description : map supplementary data with already collected results with hmdb search - delete the entry if hmdb card doesn't exist...
	## Input : $results, $features
	## Output : $results
	## Usage : my ( $results ) = map_suppl_data_on_hmdb_results ( $results, $features ) ;

=cut
## START of SUB
sub map_suppl_data_on_hmdb_results {
    ## Retrieve Values
    my $self = shift ;
    my ( $results, $features ) = @_;
    my ( @moreResults ) = ( () ) ;

    foreach my $result (@{$results}) {

    	my @newResult = () ;

    	foreach my $entry (@{$result}) {

    		if ( ($entry->{'ENTRY_ENTRY_ID'}) and ($entry->{'ENTRY_ENTRY_ID'} ne '' ) ) {

    			my $current_id = $entry->{'ENTRY_ENTRY_ID'} ;
    			my $newCompletedEntry = $entry ;

    			## If the id exists in feature hash and its status is not NOT_EXISTING
    			if ( ($features->{"$current_id"} ) and ( $features->{"$current_id"}{STATUS} eq 'EXISTING' ) ) {

    				## Metabolite NAME
    				if (defined $features->{"$current_id"}{'metabolite_name'} ) {
    					$newCompletedEntry->{'ENTRY_ENTRY_NAME'} = $features->{"$current_id"}{'metabolite_name'}
    				}
    				else {
    					$newCompletedEntry->{'ENTRY_ENTRY_NAME'} = 'UNKNOWN' ;
    				}
    				## Metabolite INCHI
    				if (defined $features->{"$current_id"}{'metabolite_inchi'} ) {
    					$newCompletedEntry->{'ENTRY_ENTRY_INCHI'} = $features->{"$current_id"}{'metabolite_inchi'}
    				}
    				else {
    					$newCompletedEntry->{'ENTRY_ENTRY_INCHI'} = 'NA' ;
    				}
    				## Metabolite LOGP
    				if (defined $features->{"$current_id"}{'metabolite_logp'} ) {
    					$newCompletedEntry->{'ENTRY_ENTRY_LOGP'} = $features->{"$current_id"}{'metabolite_logp'}
    				}
    				else {
    					$newCompletedEntry->{'ENTRY_ENTRY_LOGP'} = 'NA' ;
    				}
    				push (@newResult, $newCompletedEntry) ;
    			}
    			elsif ( ($features->{"$current_id"} ) and ( $features->{"$current_id"}{STATUS} eq 'NOT_EXISTING' ) ) {
    				$newCompletedEntry = undef ;
    				next ;

    			}
    			## In cas no features are given
    			else {
    				$newCompletedEntry->{'ENTRY_ENTRY_INCHI'} = 'NONEDATA' ;
    				$newCompletedEntry->{'ENTRY_ENTRY_LOGP'} = 'NONEDATA' ;
    				push (@newResult, $newCompletedEntry) ;
    			}
    		}
    	} ## END FOREACH ENTRIES

    	push (@moreResults, \@newResult) ;

    } ## END FOREACH RESULT

    return (\@moreResults) ;
}
### END of SUB


=head2 METHOD set_html_tbody_object

	## Description : initializes and build the tbody object (perl array) needed to html template
	## Input : $nb_pages, $nb_items_per_page
	## Output : $tbody_object
	## Usage : my ( $tbody_object ) = set_html_tbody_object($nb_pages, $nb_items_per_page) ;

=cut
## START of SUB
sub set_html_tbody_object {
	my $self = shift ;
    my ( $nb_pages, $nb_items_per_page ) = @_ ;

	my ( @tbody_object ) = ( ) ;

	for ( my $i = 1 ; $i <= $nb_pages ; $i++ ) {

	    my %pages = (
	    	# tbody feature
	    	PAGE_NB => $i,
	    	MASSES => [], ## end MASSES
	    ) ; ## end TBODY N
	    push (@tbody_object, \%pages) ;
	}
    return(\@tbody_object) ;
}
## END of SUB

=head2 METHOD add_mz_to_tbody_object

	## Description : initializes and build the mz object (perl array) needed to html template
	## Input : $tbody_object, $nb_items_per_page, $mz_list
	## Output : $tbody_object
	## Usage : my ( $tbody_object ) = add_mz_to_tbody_object( $tbody_object, $nb_items_per_page, $mz_list ) ;

=cut
## START of SUB
sub add_mz_to_tbody_object {
	my $self = shift ;
    my ( $tbody_object, $nb_items_per_page, $mz_list, $ids_list ) = @_ ;

	my ( $current_page, $mz_index ) = ( 0, 0 ) ;

	foreach my $page ( @{$tbody_object} ) {

		my @colors = ('white', 'green') ;
		my ( $current_index, , $icolor ) = ( 0, 0 ) ;

		for ( my $i = 1 ; $i <= $nb_items_per_page ; $i++ ) {
			#
			if ( $current_index > $nb_items_per_page ) { ## manage exact mz per html page
				$current_index = 0 ;
				last ; ##
			}
			else {
				$current_index++ ;
				if ( $icolor > 1 ) { $icolor = 0 ; }

				if ( exists $mz_list->[$mz_index]  ) {

					my %mz = (
						# mass feature
						MASSES_ID_QUERY => $ids_list->[$mz_index],
						MASSES_MZ_QUERY => $mz_list->[$mz_index],
						MZ_COLOR => $colors[$icolor],
						MASSES_NB => $mz_index+1,
						ENTRIES => [] ,
					) ;
					push ( @{ $tbody_object->[$current_page]{MASSES} }, \%mz ) ;
					# Html attr for mass
					$icolor++ ;
				}
			}
			$mz_index++ ;
		} ## foreach mz

		$current_page++ ;
	}
    return($tbody_object) ;
}
## END of SUB

=head2 METHOD add_entries_to_tbody_object

	## Description : initializes and build the entries object (perl array) needed to html template
	## Input : $tbody_object, $nb_items_per_page, $mz_list, $entries
	## Output : $tbody_object
	## Usage : my ( $tbody_object ) = add_entries_to_tbody_object( $tbody_object, $nb_items_per_page, $mz_list, $entries ) ;

=cut
## START of SUB
sub add_entries_to_tbody_object {
	## Retrieve Values
    my $self = shift ;
    my ( $tbody_object, $nb_items_per_page, $mz_list, $entries ) = @_ ;

    my $index_page = 0 ;
    my $index_mz_continous = 0 ;

    foreach my $page (@{$tbody_object}) {

    	my $index_mz = 0 ;

    	foreach my $mz (@{ $tbody_object->[$index_page]{MASSES} }) {

    		my $index_entry = 0 ;

    		my @anti_redondant = ('N/A') ;
    		my $check_rebond = 0 ;
    		my $check_noentry = 0 ;

    		foreach my $entry (@{ $entries->[$index_mz_continous] }) {
    			$check_noentry ++ ;
    			## dispo anti doublons des entries
    			foreach my $rebond (@anti_redondant) {
    				if ( $rebond eq $entries->[$index_mz_continous][$index_entry]{ENTRY_ENTRY_ID} ) {	$check_rebond = 1 ; last ; }
    			}

    			if ( $check_rebond == 0 ) {

    				 push ( @anti_redondant, $entries->[$index_mz_continous][$index_entry]{ENTRY_ENTRY_ID} ) ;

    				my %entry = (
		    			ENTRY_COLOR => $tbody_object->[$index_page]{MASSES}[$index_mz]{MZ_COLOR},
		    			ENTRY_ENTRY_NAME => $entries->[$index_mz_continous][$index_entry]{ENTRY_ENTRY_NAME},
		   				ENTRY_ENTRY_ID => $entries->[$index_mz_continous][$index_entry]{ENTRY_ENTRY_ID},
		   				ENTRY_ENTRY_ID2 => $entries->[$index_mz_continous][$index_entry]{ENTRY_ENTRY_ID},
						ENTRY_FORMULA => $entries->[$index_mz_continous][$index_entry]{ENTRY_FORMULA},
						ENTRY_CPD_MZ => $entries->[$index_mz_continous][$index_entry]{ENTRY_CPD_MZ},
						ENTRY_ADDUCT => $entries->[$index_mz_continous][$index_entry]{ENTRY_ADDUCT},
						ENTRY_ADDUCT_TYPE => $entries->[$index_mz_continous][$index_entry]{ENTRY_ADDUCT_TYPE},
						ENTRY_ADDUCT_MZ => $entries->[$index_mz_continous][$index_entry]{ENTRY_ADDUCT_MZ},
						ENTRY_DELTA => $entries->[$index_mz_continous][$index_entry]{ENTRY_DELTA},
						ENTRY_ENTRY_INCHI => $entries->[$index_mz_continous][$index_entry]{ENTRY_ENTRY_INCHI},
						ENTRY_ENTRY_LOGP => $entries->[$index_mz_continous][$index_entry]{ENTRY_ENTRY_LOGP},
		    		) ;

	    			push ( @{ $tbody_object->[$index_page]{MASSES}[$index_mz]{ENTRIES} }, \%entry) ;
    			}
    			$check_rebond = 0 ; ## reinit double control
    			$index_entry++ ;
    		} ## end foreach
    		if ($check_noentry == 0 ) {
    			my %entry = (
		    			ENTRY_COLOR => $tbody_object->[$index_page]{MASSES}[$index_mz]{MZ_COLOR},
		    			ENTRY_ENTRY_NAME  => 'UNKNOWN',
		   				ENTRY_ENTRY_ID => 'NONE',
		   				ENTRY_ENTRY_ID2 => '',
						ENTRY_FORMULA => 'n/a',
						ENTRY_CPD_MZ => 'n/a',
						ENTRY_ADDUCT => 'n/a',
						ENTRY_ADDUCT_TYPE => 'n/a',
						ENTRY_ADDUCT_MZ => 'n/a',
						ENTRY_DELTA => 0,
						ENTRY_ENTRY_INCHI => 'n/a',
						ENTRY_ENTRY_LOGP => 'n/a',
		    		) ;
		    		push ( @{ $tbody_object->[$index_page]{MASSES}[$index_mz]{ENTRIES} }, \%entry) ;
    		}
    		$index_mz ++ ;
    		$index_mz_continous ++ ;
    	}
    	$index_page++ ;
    }
    return($tbody_object) ;
}
## END of SUB

=head2 METHOD write_html_skel

	## Description : prepare and write the html output file
	## Input : $html_file_name, $html_object, $html_template
	## Output : $html_file_name
	## Usage : my ( $html_file_name ) = write_html_skel( $html_file_name, $html_object ) ;

=cut
## START of SUB
sub write_html_skel {
	## Retrieve Values
    my $self = shift ;
    my ( $html_file_name,  $html_object, $pages , $search_condition, $html_template, $js_path, $css_path ) = @_ ;

    my $html_file = $$html_file_name ;

    if ( defined $html_file ) {
		open ( HTML, ">$html_file" ) or die "Can't create the output file $html_file " ;

		if (-e $html_template) {
			my $ohtml = HTML::Template->new(filename => $html_template);
			$ohtml->param(  JS_GALAXY_PATH => $js_path, CSS_GALAXY_PATH => $css_path  ) ;
			$ohtml->param(  CONDITIONS => $search_condition  ) ;
			$ohtml->param(  PAGES_NB => $pages  ) ;
			$ohtml->param(  PAGES => $html_object  ) ;
			print HTML $ohtml->output ;
		}
		else {
			croak "Can't fill any html output : No template available ($html_template)\n" ;
		}

		close (HTML) ;
    }
    else {
    	croak "No output file name available to write HTML file\n" ;
    }
    return(\$html_file) ;
}
## END of SUB

=head2 METHOD set_lm_matrix_object

	## Description : build the hmdb_row under its ref form
	## Input : $header, $init_mzs, $entries
	## Output : $hmdb_matrix
	## Usage : my ( $hmdb_matrix ) = set_lm_matrix_object( $header, $init_mzs, $entries ) ;

=cut
## START of SUB
sub set_lm_matrix_object {
	## Retrieve Values
    my $self = shift ;
    my ( $header, $init_mzs, $entries ) = @_ ;

    my @hmdb_matrix = () ;

    if ( defined $header ) {
    	my @headers = () ;
    	push @headers, $header ;
    	push @hmdb_matrix, \@headers ;
    }

    my $index_mz = 0 ;

    foreach my $mz ( @{$init_mzs} ) {

    	my $index_entries = 0 ;
    	my @clusters = () ;
    	my $cluster_col = undef ;

    	my @anti_redondant = ('N/A') ;
    	my $check_rebond = 0 ;

    	my $nb_entries = scalar (@{ $entries->[$index_mz] }) ;

    	foreach my $entry (@{ $entries->[$index_mz] }) {

    		## dispo anti doublons des entries
    		foreach my $rebond (@anti_redondant) {
    			if ( $rebond eq $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID} ) {	$check_rebond = 1 ; last ; }
    		}

	    	if ( $check_rebond == 0 ) {

	    		push ( @anti_redondant, $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID} ) ;

		    	my $delta = $entries->[$index_mz][$index_entries]{ENTRY_DELTA} ;
	    		my $formula =  $entries->[$index_mz][$index_entries]{ENTRY_FORMULA} ;
	    		my $hmdb_id = $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID}  ;

		    	## METLIN data display model
		   		## entry1=VAR1::VAR2::VAR3::VAR4|entry2=VAR1::VAR2::VAR3::VAR4|...
		   		# manage final pipe
		   		if ($index_entries < $nb_entries-1 ) { 	$cluster_col .= $delta.'::('.$formula.')::'.$hmdb_id.'|' ; }
		   		else { 						   			$cluster_col .= $delta.'::('.$formula.')::'.$hmdb_id ; 	}

	    	}
	    	$check_rebond = 0 ; ## reinit double control
	    	$index_entries++ ;
	    } ## end foreach
	    if ( !defined $cluster_col ) { $cluster_col = 'NONE' ; }
    	push (@clusters, $cluster_col) ;
    	push (@hmdb_matrix, \@clusters) ;
    	$index_mz++ ;
    }
    return(\@hmdb_matrix) ;
}
## END of SUB

=head2 METHOD set_hmdb_matrix_object_with_ids

	## Description : build the hmdb_row under its ref form (IDS only)
	## Input : $header, $init_mzs, $entries
	## Output : $hmdb_matrix
	## Usage : my ( $hmdb_matrix ) = set_hmdb_matrix_object_with_ids( $header, $init_mzs, $entries ) ;

=cut
## START of SUB
sub set_hmdb_matrix_object_with_ids {
	## Retrieve Values
    my $self = shift ;
    my ( $header, $init_mzs, $entries ) = @_ ;

#    print Dumper $entries ;

    my @hmdb_matrix = () ;

    if ( defined $header ) {
    	my @headers = () ;

    	## redefined the header hmdb(delta::name::mz::formula::adduct::id)
    	$header = 'hmdb(delta::name::mz::formula::adduct::id)' ;
    	push @headers, $header ;
    	push @hmdb_matrix, \@headers ;
    }

    my $index_mz = 0 ;

    foreach my $mz ( @{$init_mzs} ) {

    	my $index_entries = 0 ;
    	my @clusters = () ;
    	my $cluster_col = undef ;

    	my @anti_redondant = ('N/A') ;
    	my $check_rebond = 0 ;

    	my $nb_entries = scalar (@{ $entries->[$index_mz] }) ;

    	foreach my $entry (@{ $entries->[$index_mz] }) {

    		## dispo anti doublons des entries
    		foreach my $rebond (@anti_redondant) {
    			if ( $rebond eq $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID} ) {	$check_rebond = 1 ; last ; }
    		}

	    	if ( $check_rebond == 0 ) {

	    		push ( @anti_redondant, $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID} ) ;
	    		##

	    		my ($hmdb_name, $hmdb_id, $hmdb_formula, $hmdb_cpd_mz, $hmdb_adduct, $hmdb_delta) = (undef, undef, undef, undef, undef, undef) ;

	    		if ($entries->[$index_mz][$index_entries]{ENTRY_ENTRY_NAME} )	{	$hmdb_name = $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_NAME} ; 	}
	    		else 															{	$hmdb_name = 'UNKNOWN' ; }

	    		if ($entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID} )	{	$hmdb_id = $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID} ; 	}
	    		else 															{	$hmdb_id = 0 ; }

	    		if ($entries->[$index_mz][$index_entries]{ENTRY_FORMULA} )	{	$hmdb_formula = $entries->[$index_mz][$index_entries]{ENTRY_FORMULA} ; 	}
	    		else 															{	$hmdb_formula = 'N/A'  ; }

	    		if ($entries->[$index_mz][$index_entries]{ENTRY_CPD_MZ} )	{	$hmdb_cpd_mz = $entries->[$index_mz][$index_entries]{ENTRY_CPD_MZ} ; 	}
	    		else 															{	$hmdb_cpd_mz = 'N/A' ; }

	    		if ($entries->[$index_mz][$index_entries]{ENTRY_ADDUCT} )	{	$hmdb_adduct = $entries->[$index_mz][$index_entries]{ENTRY_ADDUCT} ; 	}
	    		else 															{	$hmdb_adduct = 'N/A'  ; }

	    		if ($entries->[$index_mz][$index_entries]{ENTRY_DELTA} )	{	$hmdb_delta = $entries->[$index_mz][$index_entries]{ENTRY_DELTA} ; 	}
	    		else 															{	$hmdb_delta = 0 ; }


		    	## METLIN data display model
		   		## entry1= ENTRY_DELTA::ENTRY_ENTRY_NAME::ENTRY_CPD_MZ::ENTRY_FORMULA::ENTRY_ADDUCT::ENTRY_ENTRY_ID | entry2=VAR1::VAR2::VAR3::VAR4|...
		   		my $entry = $hmdb_delta.'::['."$hmdb_name".']::'.$hmdb_cpd_mz.'::'.$hmdb_formula.'::['.$hmdb_adduct.']::'.$hmdb_id ;

		   		# manage final pipe
		   		if ($index_entries < $nb_entries-1 ) { 	$cluster_col .= $entry.' | ' ; }
		   		else { 						   			$cluster_col .= $entry ; 	}

	    	}
	    	$check_rebond = 0 ; ## reinit double control
	    	$index_entries++ ;
	    } ## end foreach
	    if ( !defined $cluster_col ) { $cluster_col = 'NONE' ; }
    	push (@clusters, $cluster_col) ;
    	push (@hmdb_matrix, \@clusters) ;
    	$index_mz++ ;
    }
    return(\@hmdb_matrix) ;
}
## END of SUB

=head2 METHOD add_lm_matrix_to_input_matrix

	## Description : build a full matrix (input + lm column)
	## Input : $input_matrix_object, $lm_matrix_object, $nb_header
	## Output : $output_matrix_object
	## Usage : my ( $output_matrix_object ) = add_lm_matrix_to_input_matrix( $input_matrix_object, $lm_matrix_object, $nb_header ) ;

=cut
## START of SUB
sub add_lm_matrix_to_input_matrix {
	## Retrieve Values
    my $self = shift ;
    my ( $input_matrix_object, $lm_matrix_object, $nb_header ) = @_ ;

    my @output_matrix_object = () ;
    my $index_row = 0 ;
    my $line = 0 ;

    foreach my $row ( @{$input_matrix_object} ) {
    	my @init_row = @{$row} ;
    	$line++;

    	if ( ( defined $nb_header ) and ( $line <= $nb_header) ) {
    		push (@output_matrix_object, \@init_row) ;
    		next ;
    	}

    	if ( $lm_matrix_object->[$index_row] ) {
    		my $dim = scalar(@{$lm_matrix_object->[$index_row]}) ;

    		if ($dim > 1) { warn "the add method can't manage more than one column\n" ;}
    		my $lm_col =  $lm_matrix_object->[$index_row][$dim-1] ;

   		 	push (@init_row, $lm_col) ;
	    	$index_row++ ;
    	}
    	push (@output_matrix_object, \@init_row) ;
    }
    return(\@output_matrix_object) ;
}
## END of SUB

=head2 METHOD write_csv_skel

	## Description : prepare and write csv output file
	## Input : $csv_file, $rows
	## Output : $csv_file
	## Usage : my ( $csv_file ) = write_csv_skel( $csv_file, $rows ) ;

=cut
## START of SUB
sub write_csv_skel {
	## Retrieve Values
    my $self = shift ;
    my ( $csv_file, $rows ) = @_ ;

#    my $ocsv = lib::csv::new( {is_binary => 1 , quote_binary => 0, quote_char => undef }) ;
	my $ocsv = lib::csv::new() ;
	my $csv = $ocsv->get_csv_object("\t") ;
	$ocsv->write_csv_from_arrays($csv, $$csv_file, $rows) ;

    return($csv_file) ;
}
## END of SUB

=head2 METHOD write_csv_one_mass

	## Description : print a cvs file
	## Input : $masses, $ids, $results, $file
	## Output : N/A
	## Usage : write_csv_one_mass( $ids, $results, $file ) ;

=cut
## START of SUB
sub write_csv_one_mass {
	## Retrieve Values
    my $self = shift ;
    my ( $masses, $ids, $results, $file,  ) = @_ ;

    open(CSV, '>:utf8', "$file") or die "Cant' create the file $file\n" ;
    print CSV "ID\tQuery(Da)\tDelta(ppm)\tMetabolite_Name\tCpd_MW(Da)\tFormula\tAdduct\tAdduct_MW(Da)\tHMDB_ID\n" ;

    my $i = 0 ;

    foreach my $id (@{$ids}) {
    	my $mass = undef ;
    	if ( $masses->[$i] ) { 	$mass = $masses->[$i] ; 	}
    	else {						last ; 					 	}

    	if ( $results->[$i] ) { ## an requested id has a result in the list of hashes $results.

    		my @anti_redondant = ('N/A') ;
    		my $check_rebond = 0 ;
    		my $check_noentry = 0 ;

    		foreach my $entry (@{$results->[$i]}) {
    			$check_noentry ++ ;
    			## dispo anti doublons des entries
	    		foreach my $rebond (@anti_redondant) {
	    			if ( $rebond eq $entry->{ENTRY_ENTRY_ID} ) { $check_rebond = 1 ; last ; }
	    		}
#	    		print "\n-----------------------" ;
#	    		print Dumper $entry->{ENTRY_ENTRY_ID} ;
#	    		print "-------------------------$check_rebond\n" ;
#		    	print Dumper @anti_redondant ;
		    	if ( $check_rebond == 0 ) {

		    		push ( @anti_redondant, $entry->{ENTRY_ENTRY_ID} ) ;

	    			print CSV "$id\t$mass\t" ;

	    			## print delta
	    			if ( $entry->{ENTRY_DELTA} ) { print CSV "$entry->{ENTRY_DELTA}\t" ; }
	    			else { 							 print CSV "0\t" ; }

	    			## print cpd name
	    			if ( $entry->{ENTRY_ENTRY_NAME} ) { print CSV "[$entry->{ENTRY_ENTRY_NAME}]\t" ; }
	    			else { 							 print CSV "UNKNOWN\t" ; }

	    			## print cpd mz
	    			if ( $entry->{ENTRY_CPD_MZ} ) { print CSV "$entry->{ENTRY_CPD_MZ}\t" ; }
	    			else { 							 print CSV "N/A\t" ; }

	    			## print cpd formula
	    			if ( $entry->{ENTRY_FORMULA} ) { print CSV "$entry->{ENTRY_FORMULA}\t" ; }
	    			else { 							 print CSV "N/A\t" ; }

	    			## print adduct
	    			if ( $entry->{ENTRY_ADDUCT} ) { print CSV "[$entry->{ENTRY_ADDUCT}]\t" ; }
	    			else { 							 print CSV "N/A\t" ; }

	    			## print adduct mz
	    			if ( $entry->{ENTRY_ADDUCT_MZ} ) { print CSV "$entry->{ENTRY_ADDUCT_MZ}\t" ; }
	    			else { 							 print CSV "N/A\t" ; }

	    			## print cpd id
	    			if ( $entry->{ENTRY_ENTRY_ID} ) { print CSV "$entry->{ENTRY_ENTRY_ID}\n" ; }
	    			else { 							print CSV "N/A\n" ; }
		    	}
		    	$check_rebond = 0 ; ## reinit double control
    		} ## end foreach
    		if ($check_noentry == 0 ) {
    			print CSV "$id\t$mass\t0\tUNKNOWN\tN/A\tN/A\tN/A\tN/A\tN/A\n" ;
    		}
    	}
    	$i++ ;
    }
   	close(CSV) ;
    return() ;
}
## END of SUB

1 ;


__END__

=head1 SUPPORT

You can find documentation for this module with the perldoc command.

 perldoc hmdb.pm

=head1 Exports

=over 4

=item :ALL is ...

=back

=head1 AUTHOR

Franck Giacomoni E<lt>franck.giacomoni@clermont.inra.frE<gt>

=head1 LICENSE

This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.

=head1 VERSION

version 1 : 06 / 06 / 2013

version 2 : 27 / 01 / 2014

version 3 : 19 / 11 / 2014

version 4 : 28 / 01 / 2016

version 5 : 02 / 11 /2016

=cut
author	fgiacomoni
date	Thu, 19 May 2022 13:43:09 +0000
parents	453fbe98925a
children	d8e2ede293a6