comparison lib/hmdb.pm @ 23:2d8a310e86ce draft

Prod branch Updating with v.:CI_COMMIT_TAG- - Fxx
author fgiacomoni
date Thu, 19 May 2022 13:43:09 +0000
parents 453fbe98925a
children d8e2ede293a6
comparison
equal deleted inserted replaced
22:453fbe98925a 23:2d8a310e86ce
19 19
20 use vars qw($VERSION @ISA @EXPORT %EXPORT_TAGS); 20 use vars qw($VERSION @ISA @EXPORT %EXPORT_TAGS);
21 21
22 our $VERSION = "1.0"; 22 our $VERSION = "1.0";
23 our @ISA = qw(Exporter); 23 our @ISA = qw(Exporter);
24 our @EXPORT = qw( map_suppl_data_on_hmdb_results get_unik_ids_from_results get_hmdb_metabocard_from_id extract_sub_mz_lists test_matches_from_hmdb_ua prepare_multi_masses_query get_matches_from_hmdb_ua parse_hmdb_csv_results set_html_tbody_object add_mz_to_tbody_object add_entries_to_tbody_object write_html_skel set_lm_matrix_object set_hmdb_matrix_object_with_ids add_lm_matrix_to_input_matrix write_csv_skel write_csv_one_mass ); 24 our @EXPORT = qw( parseHmdb5CSVResults getMatchesFromHmdb5WithUA map_suppl_data_on_hmdb_results get_unik_ids_from_results get_hmdb_metabocard_from_id extract_sub_mz_lists test_matches_from_hmdb_ua prepare_multi_masses_query get_matches_from_hmdb_ua parse_hmdb_csv_results set_html_tbody_object add_mz_to_tbody_object add_entries_to_tbody_object write_html_skel set_lm_matrix_object set_hmdb_matrix_object_with_ids add_lm_matrix_to_input_matrix write_csv_skel write_csv_one_mass );
25 our %EXPORT_TAGS = ( ALL => [qw( map_suppl_data_on_hmdb_results get_unik_ids_from_results get_hmdb_metabocard_from_id extract_sub_mz_lists test_matches_from_hmdb_ua prepare_multi_masses_query get_matches_from_hmdb_ua parse_hmdb_csv_results set_html_tbody_object add_mz_to_tbody_object add_entries_to_tbody_object write_html_skel set_lm_matrix_object set_hmdb_matrix_object_with_ids add_lm_matrix_to_input_matrix write_csv_skel write_csv_one_mass )] ); 25 our %EXPORT_TAGS = ( ALL => [qw( parseHmdb5CSVResults getMatchesFromHmdb5WithUA map_suppl_data_on_hmdb_results get_unik_ids_from_results get_hmdb_metabocard_from_id extract_sub_mz_lists test_matches_from_hmdb_ua prepare_multi_masses_query get_matches_from_hmdb_ua parse_hmdb_csv_results set_html_tbody_object add_mz_to_tbody_object add_entries_to_tbody_object write_html_skel set_lm_matrix_object set_hmdb_matrix_object_with_ids add_lm_matrix_to_input_matrix write_csv_skel write_csv_one_mass )] );
26 26
27 =head1 NAME 27 =head1 NAME
28 28
29 My::Module - An example module 29 My::Module - An example module
30 30
271 ### END of SUB 271 ### END of SUB
272 272
273 273
274 =head2 METHOD testMatchesFromHmdbWithUA 274 =head2 METHOD testMatchesFromHmdbWithUA
275 275
276 ## Description : test a single query with tests parameters on hmdb - get the status of the complete server infra. 276 ## Description : [DEPRECATED] test a single query with tests parameters on hmdb - get the status of the complete server infra.
277 ## Input : none 277 ## Input : none
278 ## Output : $status_line 278 ## Output : $status_line
279 ## Usage : my ( $status_line ) = testMatchesFromHmdbWithUA( ) ; 279 ## Usage : my ( $status_line ) = testMatchesFromHmdbWithUA( ) ;
280 280
281 =cut 281 =cut
286 286
287 my @page = () ; 287 my @page = () ;
288 #based on https://stackoverflow.com/questions/17732916/perl-post-automation-and 288 #based on https://stackoverflow.com/questions/17732916/perl-post-automation-and
289 289
290 my $mech = WWW::Mechanize->new( 290 my $mech = WWW::Mechanize->new(
291 agent => 'wonderbot for W4M 1.01', 291 # agent => 'wonderbot for W4M 1.01',
292 autocheck => 1, 292 agent => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:93.0) Gecko/20100101 Firefox/93.0' ,
293 autocheck => 0,
293 ); 294 );
294 295
295 my $statusGetLine = 0 ; 296 my $statusGetLine = 0 ;
296 my $statusPostLine = 0 ; 297 my $statusPostLine = 0 ;
298 my $csrftoken = undef ;
297 299
298 #receiving cookies and authentication token (CFRS) 300 #receiving cookies and authentication token (CFRS)
299 my $reqInit = $mech->get("http://www.hmdb.ca/spectra/ms/search"); 301 my $reqInit = $mech->get("http://www.hmdb.ca/spectra/ms/search");
302 $statusGetLine = $mech->status() ;
303
304 if ($statusGetLine == 200 ) {
305 die 'no CSRF_REQUEST_TOKEN_VALUE in page found'
306 unless ($reqInit->decoded_content =~ /\"csrf-token\"\s+content=\"(.*)\"/) ;
307 $csrftoken = $1;
308 print "\nTOKEN: $csrftoken\n" ;
309 $mech->add_header("X-CSRFToken", $csrftoken);
310 $mech->add_header('Host', 'specdb.wishartlab.com');
311 $mech->add_header('Connection', 'keep-alive');
312 $mech->add_header('Upgrade-Insecure-Requests', '1');
313 $mech->add_header('Content-Type', 'application/x-www-form-urlencoded');
314 $mech->add_header('Accept-Language', 'en-US,en;q=0.5');
315 $mech->add_header('Accept-Encoding', 'gzip, deflate');
316 # $mech->add_header('Content-Length', "300");
317 $mech->add_header('Origin', 'null');
318 $mech->add_header('DNT', '1');
319 $mech->add_header('Referer', 'https://hmdb.ca/spectra/ms/search');
320 # $mech->add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8');
321 $mech->add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8');
322
323
324 }
325
326 ## POST test
327 my $top = 1 ;
328 while ($statusPostLine != 200 ) {
329
330 # Fix a limit at 3 tries...
331 if ($top < 4) {
332 print "\tTesting HMDB server connexion ($top time(s) )...\n" ;
333 eval {
334 $mech->post(
335 "http://specdb.wishartlab.com/ms/search.csv",
336 Content => 'utf8=TRUE&authenticity_token='.$csrftoken.'&mode=positive&adduct_type=M%2BH%202M%2BH&query_masses=125.0089&tolerance=0.001&database=HMDB&commit=Download Results As CSV'
337 );
338 } ;
339 # print Dumper $mech ;
340 $statusPostLine = $mech->status() ;
341 print "Status: $statusPostLine" ;
342 }
343 else {
344 last ;
345 }
346 $top++ ;
347 }## End While
348 return (\$statusPostLine) ;
349 }
350 ## END of SUB
351
352
353 =head2 METHOD testMatchesFromHmdb5WithUA
354
355 ## Description : test a single query with tests parameters on hmdb - get the status of the complete server infra (API V5.0 compliant).
356 ## Input : none
357 ## Output : $status_line
358 ## Usage : my ( $status_line ) = testMatchesFromHmdb5WithUA( ) ;
359
360 =cut
361 ## START of SUB
362 sub testMatchesFromHmdb5WithUA {
363 ## Retrieve Values
364 my $self = shift ;
365
366 my $mech = WWW::Mechanize->new(
367 agent => 'wonderbot for W4M 3.0',
368 autocheck => 1,
369 timeout => 2400,
370 );
371
372 my $statusGetLine = 0 ;
373 my $statusPostLine = 0 ;
374
375 #receiving cookies and authentication token (CFRS)
376 my $reqInit = $mech->get("https://www.hmdb.ca/spectra/ms/search");
300 $statusGetLine = $mech->status() ; 377 $statusGetLine = $mech->status() ;
301 378
302 if ($statusGetLine == 200 ) { 379 if ($statusGetLine == 200 ) {
303 die 'no CSRF_REQUEST_TOKEN_VALUE in page found' 380 die 'no CSRF_REQUEST_TOKEN_VALUE in page found'
304 unless ($reqInit->decoded_content =~ /\"csrf-token\"\s+content=\"(.*)\"/) ; 381 unless ($reqInit->decoded_content =~ /\"csrf-token\"\s+content=\"(.*)\"/) ;
305 my $csrftoken = $1; 382 my $csrftoken = $1;
306 # print "\nTOKEN: $csrftoken\n" ; 383 # print "\nTOKEN: $csrftoken\n" ;
307 $mech->add_header("X-CSRFToken", $csrftoken); 384 $mech->add_header("X-CSRFToken", $csrftoken);
308 $mech->add_header('Connection', 'keep-alive'); 385 $mech->add_header('Connection', 'keep-alive');
309 $mech->add_header('Content-Type', 'application/x-www-form-urlencoded'); 386 $mech->add_header('Content-Type', 'application/x-www-form-urlencoded');
310 $mech->add_header('Referer', 'http://www.hmdb.ca/spectra/ms/search'); 387 $mech->add_header('Referer', 'https://www.hmdb.ca/spectra/ms/search');
311 $mech->add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'); 388 $mech->add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8');
312 } 389 }
390
313 391
314 ## POST test 392 ## POST test
315 my $top = 1 ; 393 my $top = 1 ;
316 while ($statusPostLine != 200 ) { 394 while ($statusPostLine != 200 ) {
317 395
318 # Fix a limit at 3 tries... 396 # Fix a limit at 3 tries...
319 if ($top < 4) { 397 if ($top < 4) {
320 print "\tTesting HMDB server connexion ($top time(s) )...\n" ; 398 print "\tTesting HMDB server connexion ($top time(s) )...\n" ;
321 $mech->post( 399 eval {
322 "http://specdb.wishartlab.com/ms/search.csv", 400 my $res = $mech->get(
323 Content => 'utf8=TRUE&mode=positive&adduct_type=M%2BH%202M%2BH&query_masses=125.0089&tolerance=0.001&database=HMDB&commit=Download Results As CSV' 401 'https://hmdb.ca/spectra/ms/generate_csv.csv?'
324 ); 402 .'results%5Baction%5D=search'
325 403 .'&results%5Badduct_type%5D%5B%5D=M%2BH%202M%2BH'
404 #.'&results%5Bauthenticity_token%5D='
405 .'&results%5Bccs_predictors%5D='
406 .'&results%5Bccs_tolerance%5D='
407 .'&results%5Bcommit%5D=Search'
408 .'&results%5Bcontroller%5D=specdb%2Fms'
409 .'&results%5Bms_search_ion_mode%5D=positive'
410 .'&results%5Bquery_masses%5D=125.0089'
411 .'&results%5Btolerance%5D=0.001'
412 .'&results%5Btolerance_units%5D=Da'
413 .'&results%5Butf8%5D=%E2%9C%93'
414
415 );
416 } ;
326 # print Dumper $mech ; 417 # print Dumper $mech ;
327 $statusPostLine = $mech->status() ; 418 $statusPostLine = $mech->status() ;
419 print "Status: $statusPostLine" ;
328 } 420 }
329 else { 421 else {
330 last ; 422 last ;
331 } 423 }
332 $top++ ; 424 $top++ ;
333 }## End While 425 }## End While
334 return (\$statusPostLine) ; 426 return (\$statusPostLine) ;
335 } 427 }
336 ## END of SUB 428 ## END of SUB
337 429
338
339
340
341 =head2 METHOD check_state_from_hmdb_ua 430 =head2 METHOD check_state_from_hmdb_ua
342 431
343 ## Description : check the thhp status of hmdb and kill correctly the script if necessary. 432 ## Description : check the thhp status of hmdb and kill correctly the script if necessary.
344 ## Input : $status 433 ## Input : $status
345 ## Output : none 434 ## Output : none
351 ## Retrieve Values 440 ## Retrieve Values
352 my $self = shift ; 441 my $self = shift ;
353 my ($status) = @_ ; 442 my ($status) = @_ ;
354 443
355 if (!defined $$status) { 444 if (!defined $$status) {
356 croak "No http status is defined for the distant server" ; 445 croak "No https status is defined for the distant server" ;
357 } 446 }
358 else { 447 else {
359 unless ( $$status == 200 ) { 448 unless ( $$status == 200 ) {
360 if ( $$status == 502 ) { croak "Bad Gateway (502): The HMDB server, while acting as a gateway or proxy, received an invalid response from the upstream server. The Hmdb tool is stopped with error." ; } 449 if ( $$status == 502 ) { croak "Bad Gateway (502): The HMDB server, while acting as a gateway or proxy, received an invalid response from the upstream server. The Hmdb tool is stopped with error." ; }
361 if ( $$status == 504 ) { croak "Gateway Timeout (504): The HMDB server was acting as a gateway or proxy and did not receive a timely response from the upstream server. The Hmdb tool is stopped with error." ; } 450 if ( $$status == 504 ) { croak "Gateway Timeout (504): The HMDB server was acting as a gateway or proxy and did not receive a timely response from the upstream server. The Hmdb tool is stopped with error." ; }
387 sub get_matches_from_hmdb_ua { 476 sub get_matches_from_hmdb_ua {
388 ## Retrieve Values 477 ## Retrieve Values
389 my $self = shift ; 478 my $self = shift ;
390 my ( $masses, $delta, $mode ) = @_ ; 479 my ( $masses, $delta, $mode ) = @_ ;
391 480
481 ## Added May, 2022
482 warn "[DEPRECATED Methode] method get_matches_from_hmdb_ua is deprecated and not compatible with HMDB 4.0" ;
483 return ([], 500) ;
484
392 my @page = () ; 485 my @page = () ;
393 486
394 my $ua = LWP::UserAgent->new( keep_alive => 10 ); 487 my $ua = LWP::UserAgent->new( keep_alive => 10 );
395 $ua->agent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:64.0) Gecko/20100101 Firefox/64.0"); 488 $ua->agent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:64.0) Gecko/20100101 Firefox/64.0");
396 $ua->timeout(2400) ; 489 $ua->timeout(2400) ;
441 sub getMatchesFromHmdbWithUA { 534 sub getMatchesFromHmdbWithUA {
442 ## Retrieve Values 535 ## Retrieve Values
443 my $self = shift ; 536 my $self = shift ;
444 my ( $masses, $delta, $mode, $adducts ) = @_ ; 537 my ( $masses, $delta, $mode, $adducts ) = @_ ;
445 538
539 ## Added May, 2022
540 warn "[DEPRECATED Methode] method getMatchesFromHmdbWithUA is deprecated and not compatible with HMDB 5.0" ;
541 return ([], 500) ;
542
446 my @page = () ; 543 my @page = () ;
447 544
448 #based on https://stackoverflow.com/questions/17732916/perl-post-automation-and 545 #based on https://stackoverflow.com/questions/17732916/perl-post-automation-and
449 546
450 my $mech = WWW::Mechanize->new( 547 my $mech = WWW::Mechanize->new(
499 596
500 return (\@page, $statusPostLine) ; 597 return (\@page, $statusPostLine) ;
501 } 598 }
502 ## END of SUB 599 ## END of SUB
503 600
601 =head2 METHOD getMatchesFromHmdb5WithUA
602
603 ## Description : HMDB v5.0 querying via an user agent with parameters : mz, delta and molecular species (neutral, pos, neg)
604 ## Input : $mass, $delta, $mode, adducts
605 ## Output : $results
606 ## Usage : my ( $results ) = getMatchesFromHmdbWithUA( $mass, $delta, $mode ) ;
607
608 =cut
609 ## START of SUB
610 sub getMatchesFromHmdb5WithUA {
611 ## Retrieve Values
612 my $self = shift ;
613 my ( $masses, $delta, $mode, $adducts ) = @_ ;
614
615 my @page = () ;
616
617 #based on https://stackoverflow.com/questions/17732916/perl-post-automation-and
618
619 my $mech = WWW::Mechanize->new(
620 agent => 'wonderbot for W4M 3.0',
621 autocheck => 1,
622 timeout => 2400,
623 );
624
625 my $statusGetLine = 0 ;
626 my $statusPostLine = 0 ;
627
628 #receiving cookies and authentication token (CFRS)
629 my $reqInit = $mech->get("https://www.hmdb.ca/spectra/ms/search");
630 $statusGetLine = $mech->status() ;
631
632 if ($statusGetLine == 200 ) {
633 die 'no CSRF_REQUEST_TOKEN_VALUE in page found'
634 unless ($reqInit->decoded_content =~ /\"csrf-token\"\s+content=\"(.*)\"/) ;
635 my $csrftoken = $1;
636 # print "\nTOKEN: $csrftoken\n" ;
637 $mech->add_header("X-CSRFToken", $csrftoken);
638 $mech->add_header('Connection', 'keep-alive');
639 $mech->add_header('Content-Type', 'application/x-www-form-urlencoded');
640 $mech->add_header('Referer', 'https://www.hmdb.ca/spectra/ms/search');
641 $mech->add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8');
642 }
643 ## adduct format is adduct_type=M%2BH%202M%2BH
644
645 if ( (!defined $adducts) or ( $adducts eq '') ) {
646 $adducts = 'Unknown' ;
647 }
648
649 my $res = $mech->get(
650 'https://hmdb.ca/spectra/ms/generate_csv.csv?'
651 .'results%5Baction%5D=search'
652 .'&results%5Badduct_type%5D%5B%5D='.$adducts
653 #.'&results%5Bauthenticity_token%5D='
654 .'&results%5Bccs_predictors%5D='
655 .'&results%5Bccs_tolerance%5D='
656 .'&results%5Bcommit%5D=Search'
657 .'&results%5Bcontroller%5D=specdb%2Fms'
658 .'&results%5Bms_search_ion_mode%5D='.$mode
659 .'&results%5Bquery_masses%5D='.$masses
660 .'&results%5Btolerance%5D='.$delta
661 .'&results%5Btolerance_units%5D=Da'
662 .'&results%5Butf8%5D=%E2%9C%93'
663
664 );
665 $statusGetLine = $mech->status() ;
666
667 if ($mech->success) {
668 @page = split ( /\n/, $res->decoded_content ) ;
669 $statusPostLine = 'OK' ;
670 }
671 else {
672 $statusPostLine = $mech->status() ;
673 warn "\t[HMDB service issue !! the server returned a $statusPostLine HTTP error]" ;
674 }
675
676 # print Dumper $res->decoded_content ;
677
678 return (\@page, $statusPostLine) ;
679 }
680 ## END of SUB
681
682
683 =head2 METHOD parseHmdb5CSVResults
684
685 ## Description : parse the csv results and get data - API 5.0 compliant
686 ## Input : $csv
687 ## Output : $results
688 ## Usage : my ( $results ) = parseHmdb5CSVResults( $csv ) ;
689
690 =cut
691 ## START of SUB
692 sub parseHmdb5CSVResults {
693 ## Retrieve Values
694 my $self = shift ;
695 my ( $csv, $masses, $max_query ) = @_ ;
696
697 my $test = 0 ;
698 my ($query_mass,$compound_id,$formula,$compound_mass,$adduct,$adduct_type,$adduct_mass,$delta) = (0, undef, undef, undef, undef, undef, undef, undef) ;
699
700 my %result_by_entry = () ;
701 my %features = () ;
702
703 # print Dumper $csv ;
704 # print Dumper $masses ;
705 # print Dumper $max_query ;
706
707 foreach my $line (@{$csv}) {
708 ## NEW HMDB format V5.0 - May2022
709 if ($line !~ /query_mass,compound_id,compound_name,formula,monoisotopic_mass,adduct,adduct_type,adduct_m\/z,"delta\(ppm\),",ccs_value/) {
710 #query_mass,compound_id,compound_name,formula,monoisotopic_mass,adduct,adduct_type,adduct_m/z,"delta(ppm),",ccs_value',
711
712 if ( $line =~ /(\d+\.\d+),(\w+),(.*),(\w+),(\d+\.\d+),([\w|n\/a|\s+]+)\s*,(\+|\-),(\d+\.\d+),(\d+),(\d*)/ ) {
713 print "$line\n" ;
714 #if ( $line =~ /(\d+\.\d+),(\w+),(.*),([\w|n\/a|\s+]+)\s*,(\w+),(\d+\.\d+),(.*),(\+|\-),(\d+\.\d+),(\d+)/ ) {
715 my @entry = ("$1","$2","$3","$4","$5","$6","$7","$8","$9","$10") ;
716
717 if ( !exists $result_by_entry{$entry[0]} ) { $result_by_entry{$entry[0]} = [] ; }
718
719 $features{ENTRY_ENTRY_ID} = $entry[1] ;
720 $features{ENTRY_ENTRY_NAME} = $entry[2] ;
721 $features{ENTRY_FORMULA} = $entry[3] ;
722 $features{ENTRY_CPD_MZ} = $entry[4] ;
723 $features{ENTRY_ADDUCT} = $entry[5] ;
724 $features{ENTRY_ADDUCT_TYPE} = $entry[6] ;
725 $features{ENTRY_ADDUCT_MZ} = $entry[7] ;
726 $features{ENTRY_DELTA} = $entry[8] ;
727
728 my %temp = %features ;
729 push (@{$result_by_entry{$entry[0]} }, \%temp) ;
730 }
731 # elsif ($line =~ /(\d+\.\d+)/) { #
732 # ## 288.082286511284,HMDB0002255,R-Methylmalonyl-CoA, C01213 ,C25H40N7O19P3S,867.131252359,M-3H,-,288.036475,159
733 # ## 283.108004472276,"Bicyclo_3,1,1heptane-2,3-diol,2,6,6_trimethyl","2,3-Pinanediol",n/a,C10H18O2,170.13067982,M+TFA-H,-,283.116266,29
734 # ## 174.034120330029,HMDB0011723,2-Methylhippuric acid, C01586,C10H11NO3,193.073893223,M-H20-H,-,174.055503,123
735 # ## 193.139160745841,HMDB0012109,"7-[(1R,2R,3R,5S)-3,5-Dihydroxy-2-[(1E,3S)-3-hydroxyoct-1-en-1-yl]cyclopentyl]-5,6-dihydroxyheptanoic acid", C06475,C20H36O7,388.246103506,M-2H,-,193.115776,121
736 # ## 214.018826827064,HMDB0011723,2-Methylhippuric acid, C01586,C10H11NO3,193.073893223,M+Na-2H,-,214.048559,139
737 # }
738 # else {
739 #
740 # warn "The parsed line ($line) does not match your pattern\n " ;
741 # }
742 }
743 else {
744 print "Header detected...Parsing is starting...\n" ;
745 next ;
746 }
747 } ## end foreach
748
749 ## manage per query_mzs (keep query masses order by array)
750 my @results = () ;
751 foreach (@{$masses}) {
752 if ($result_by_entry{$_}) {
753
754 ## cut all entries > $max_query - all entries were already sorted...by hmdb
755 my @temp_entries = @{$result_by_entry{$_}} ;
756 my @temp_cut = () ;
757 my $current_query = 0 ;
758 foreach (@temp_entries) {
759 $current_query ++ ;
760 if ($current_query > $max_query) {
761 last ;
762 }
763 else {
764 push (@temp_cut, $_) ;
765 }
766 }
767 push (@results, \@temp_cut) ;
768 }
769 else { push (@results, [] ) ; } ;
770
771 }
772 return(\@results) ;
773 }
774 ## END of SUB
775
504 776
505 =head2 METHOD parse_hmdb_csv_results 777 =head2 METHOD parse_hmdb_csv_results
506 778
507 ## Description : parse the csv results and get data 779 ## Description : [DEPRECATED] parse the csv results and get data
508 ## Input : $csv 780 ## Input : $csv
509 ## Output : $results 781 ## Output : $results
510 ## Usage : my ( $results ) = parse_hmdb_csv_results( $csv ) ; 782 ## Usage : my ( $results ) = parse_hmdb_csv_results( $csv ) ;
511 783
512 =cut 784 =cut