Mercurial > repos > fgiacomoni > hmdb_ms_search
changeset 23:2d8a310e86ce draft
Prod branch Updating with v.:CI_COMMIT_TAG- - Fxx
author | fgiacomoni |
---|---|
date | Thu, 19 May 2022 13:43:09 +0000 |
parents | 453fbe98925a |
children | d8e2ede293a6 |
files | Dockerfile README.md README.txt conf_hmdb.cfg hmdb.tmpl lib/conf.pm lib/csv.pm lib/hmdb.pm static/images/hmdb.png t/hmdb_managerTest.pl t/lib/hmdbTest.pm test-data/input_test01_fake-mzrt-input-with-id.tabular test-data/input_test02_mzrt-input-with-id.tabular test-data/out_test01.html test-data/out_test01.tabular test-data/out_test01.txt test-data/out_test02.html test-data/out_test02.tabular test-data/out_test02.txt test-data/out_test03.html test-data/out_test03.tabular test-data/out_test03.txt test-data/out_test04.html test-data/out_test04.tabular test-data/out_test04.txt wsdl_hmdb.pl wsdl_hmdb.xml |
diffstat | 5 files changed, 479 insertions(+), 207 deletions(-) [+] |
line wrap: on
line diff
--- a/lib/hmdb.pm Fri Nov 20 17:29:18 2020 +0000 +++ b/lib/hmdb.pm Thu May 19 13:43:09 2022 +0000 @@ -21,8 +21,8 @@ our $VERSION = "1.0"; our @ISA = qw(Exporter); -our @EXPORT = qw( map_suppl_data_on_hmdb_results get_unik_ids_from_results get_hmdb_metabocard_from_id extract_sub_mz_lists test_matches_from_hmdb_ua prepare_multi_masses_query get_matches_from_hmdb_ua parse_hmdb_csv_results set_html_tbody_object add_mz_to_tbody_object add_entries_to_tbody_object write_html_skel set_lm_matrix_object set_hmdb_matrix_object_with_ids add_lm_matrix_to_input_matrix write_csv_skel write_csv_one_mass ); -our %EXPORT_TAGS = ( ALL => [qw( map_suppl_data_on_hmdb_results get_unik_ids_from_results get_hmdb_metabocard_from_id extract_sub_mz_lists test_matches_from_hmdb_ua prepare_multi_masses_query get_matches_from_hmdb_ua parse_hmdb_csv_results set_html_tbody_object add_mz_to_tbody_object add_entries_to_tbody_object write_html_skel set_lm_matrix_object set_hmdb_matrix_object_with_ids add_lm_matrix_to_input_matrix write_csv_skel write_csv_one_mass )] ); +our @EXPORT = qw( parseHmdb5CSVResults getMatchesFromHmdb5WithUA map_suppl_data_on_hmdb_results get_unik_ids_from_results get_hmdb_metabocard_from_id extract_sub_mz_lists test_matches_from_hmdb_ua prepare_multi_masses_query get_matches_from_hmdb_ua parse_hmdb_csv_results set_html_tbody_object add_mz_to_tbody_object add_entries_to_tbody_object write_html_skel set_lm_matrix_object set_hmdb_matrix_object_with_ids add_lm_matrix_to_input_matrix write_csv_skel write_csv_one_mass ); +our %EXPORT_TAGS = ( ALL => [qw( parseHmdb5CSVResults getMatchesFromHmdb5WithUA map_suppl_data_on_hmdb_results get_unik_ids_from_results get_hmdb_metabocard_from_id extract_sub_mz_lists test_matches_from_hmdb_ua prepare_multi_masses_query get_matches_from_hmdb_ua parse_hmdb_csv_results set_html_tbody_object add_mz_to_tbody_object add_entries_to_tbody_object write_html_skel set_lm_matrix_object set_hmdb_matrix_object_with_ids add_lm_matrix_to_input_matrix write_csv_skel write_csv_one_mass )] ); =head1 NAME @@ -273,7 +273,7 @@ =head2 METHOD testMatchesFromHmdbWithUA - ## Description : test a single query with tests parameters on hmdb - get the status of the complete server infra. + ## Description : [DEPRECATED] test a single query with tests parameters on hmdb - get the status of the complete server infra. ## Input : none ## Output : $status_line ## Usage : my ( $status_line ) = testMatchesFromHmdbWithUA( ) ; @@ -288,12 +288,14 @@ #based on https://stackoverflow.com/questions/17732916/perl-post-automation-and my $mech = WWW::Mechanize->new( - agent => 'wonderbot for W4M 1.01', - autocheck => 1, +# agent => 'wonderbot for W4M 1.01', + agent => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:93.0) Gecko/20100101 Firefox/93.0' , + autocheck => 0, ); my $statusGetLine = 0 ; my $statusPostLine = 0 ; + my $csrftoken = undef ; #receiving cookies and authentication token (CFRS) my $reqInit = $mech->get("http://www.hmdb.ca/spectra/ms/search"); @@ -302,13 +304,23 @@ if ($statusGetLine == 200 ) { die 'no CSRF_REQUEST_TOKEN_VALUE in page found' unless ($reqInit->decoded_content =~ /\"csrf-token\"\s+content=\"(.*)\"/) ; - my $csrftoken = $1; -# print "\nTOKEN: $csrftoken\n" ; + $csrftoken = $1; + print "\nTOKEN: $csrftoken\n" ; $mech->add_header("X-CSRFToken", $csrftoken); + $mech->add_header('Host', 'specdb.wishartlab.com'); $mech->add_header('Connection', 'keep-alive'); + $mech->add_header('Upgrade-Insecure-Requests', '1'); $mech->add_header('Content-Type', 'application/x-www-form-urlencoded'); - $mech->add_header('Referer', 'http://www.hmdb.ca/spectra/ms/search'); - $mech->add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'); + $mech->add_header('Accept-Language', 'en-US,en;q=0.5'); + $mech->add_header('Accept-Encoding', 'gzip, deflate'); +# $mech->add_header('Content-Length', "300"); + $mech->add_header('Origin', 'null'); + $mech->add_header('DNT', '1'); + $mech->add_header('Referer', 'https://hmdb.ca/spectra/ms/search'); +# $mech->add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'); + $mech->add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8'); + + } ## POST test @@ -318,13 +330,15 @@ # Fix a limit at 3 tries... if ($top < 4) { print "\tTesting HMDB server connexion ($top time(s) )...\n" ; - $mech->post( - "http://specdb.wishartlab.com/ms/search.csv", - Content => 'utf8=TRUE&mode=positive&adduct_type=M%2BH%202M%2BH&query_masses=125.0089&tolerance=0.001&database=HMDB&commit=Download Results As CSV' - ); - + eval { + $mech->post( + "http://specdb.wishartlab.com/ms/search.csv", + Content => 'utf8=TRUE&authenticity_token='.$csrftoken.'&mode=positive&adduct_type=M%2BH%202M%2BH&query_masses=125.0089&tolerance=0.001&database=HMDB&commit=Download Results As CSV' + ); + } ; # print Dumper $mech ; $statusPostLine = $mech->status() ; + print "Status: $statusPostLine" ; } else { last ; @@ -336,7 +350,82 @@ ## END of SUB +=head2 METHOD testMatchesFromHmdb5WithUA + ## Description : test a single query with tests parameters on hmdb - get the status of the complete server infra (API V5.0 compliant). + ## Input : none + ## Output : $status_line + ## Usage : my ( $status_line ) = testMatchesFromHmdb5WithUA( ) ; + +=cut +## START of SUB +sub testMatchesFromHmdb5WithUA { + ## Retrieve Values + my $self = shift ; + +my $mech = WWW::Mechanize->new( + agent => 'wonderbot for W4M 3.0', + autocheck => 1, + timeout => 2400, + ); + + my $statusGetLine = 0 ; + my $statusPostLine = 0 ; + + #receiving cookies and authentication token (CFRS) + my $reqInit = $mech->get("https://www.hmdb.ca/spectra/ms/search"); + $statusGetLine = $mech->status() ; + + if ($statusGetLine == 200 ) { + die 'no CSRF_REQUEST_TOKEN_VALUE in page found' + unless ($reqInit->decoded_content =~ /\"csrf-token\"\s+content=\"(.*)\"/) ; + my $csrftoken = $1; +# print "\nTOKEN: $csrftoken\n" ; + $mech->add_header("X-CSRFToken", $csrftoken); + $mech->add_header('Connection', 'keep-alive'); + $mech->add_header('Content-Type', 'application/x-www-form-urlencoded'); + $mech->add_header('Referer', 'https://www.hmdb.ca/spectra/ms/search'); + $mech->add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'); + } + + + ## POST test + my $top = 1 ; + while ($statusPostLine != 200 ) { + + # Fix a limit at 3 tries... + if ($top < 4) { + print "\tTesting HMDB server connexion ($top time(s) )...\n" ; + eval { + my $res = $mech->get( + 'https://hmdb.ca/spectra/ms/generate_csv.csv?' + .'results%5Baction%5D=search' + .'&results%5Badduct_type%5D%5B%5D=M%2BH%202M%2BH' + #.'&results%5Bauthenticity_token%5D=' + .'&results%5Bccs_predictors%5D=' + .'&results%5Bccs_tolerance%5D=' + .'&results%5Bcommit%5D=Search' + .'&results%5Bcontroller%5D=specdb%2Fms' + .'&results%5Bms_search_ion_mode%5D=positive' + .'&results%5Bquery_masses%5D=125.0089' + .'&results%5Btolerance%5D=0.001' + .'&results%5Btolerance_units%5D=Da' + .'&results%5Butf8%5D=%E2%9C%93' + + ); + } ; +# print Dumper $mech ; + $statusPostLine = $mech->status() ; + print "Status: $statusPostLine" ; + } + else { + last ; + } + $top++ ; + }## End While + return (\$statusPostLine) ; +} +## END of SUB =head2 METHOD check_state_from_hmdb_ua @@ -353,7 +442,7 @@ my ($status) = @_ ; if (!defined $$status) { - croak "No http status is defined for the distant server" ; + croak "No https status is defined for the distant server" ; } else { unless ( $$status == 200 ) { @@ -389,6 +478,10 @@ my $self = shift ; my ( $masses, $delta, $mode ) = @_ ; + ## Added May, 2022 + warn "[DEPRECATED Methode] method get_matches_from_hmdb_ua is deprecated and not compatible with HMDB 4.0" ; + return ([], 500) ; + my @page = () ; my $ua = LWP::UserAgent->new( keep_alive => 10 ); @@ -443,6 +536,10 @@ my $self = shift ; my ( $masses, $delta, $mode, $adducts ) = @_ ; + ## Added May, 2022 + warn "[DEPRECATED Methode] method getMatchesFromHmdbWithUA is deprecated and not compatible with HMDB 5.0" ; + return ([], 500) ; + my @page = () ; #based on https://stackoverflow.com/questions/17732916/perl-post-automation-and @@ -501,10 +598,185 @@ } ## END of SUB +=head2 METHOD getMatchesFromHmdb5WithUA + + ## Description : HMDB v5.0 querying via an user agent with parameters : mz, delta and molecular species (neutral, pos, neg) + ## Input : $mass, $delta, $mode, adducts + ## Output : $results + ## Usage : my ( $results ) = getMatchesFromHmdbWithUA( $mass, $delta, $mode ) ; + +=cut +## START of SUB +sub getMatchesFromHmdb5WithUA { + ## Retrieve Values + my $self = shift ; + my ( $masses, $delta, $mode, $adducts ) = @_ ; + + my @page = () ; + + #based on https://stackoverflow.com/questions/17732916/perl-post-automation-and + + my $mech = WWW::Mechanize->new( + agent => 'wonderbot for W4M 3.0', + autocheck => 1, + timeout => 2400, + ); + + my $statusGetLine = 0 ; + my $statusPostLine = 0 ; + + #receiving cookies and authentication token (CFRS) + my $reqInit = $mech->get("https://www.hmdb.ca/spectra/ms/search"); + $statusGetLine = $mech->status() ; + + if ($statusGetLine == 200 ) { + die 'no CSRF_REQUEST_TOKEN_VALUE in page found' + unless ($reqInit->decoded_content =~ /\"csrf-token\"\s+content=\"(.*)\"/) ; + my $csrftoken = $1; +# print "\nTOKEN: $csrftoken\n" ; + $mech->add_header("X-CSRFToken", $csrftoken); + $mech->add_header('Connection', 'keep-alive'); + $mech->add_header('Content-Type', 'application/x-www-form-urlencoded'); + $mech->add_header('Referer', 'https://www.hmdb.ca/spectra/ms/search'); + $mech->add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'); + } + ## adduct format is adduct_type=M%2BH%202M%2BH + + if ( (!defined $adducts) or ( $adducts eq '') ) { + $adducts = 'Unknown' ; + } + + my $res = $mech->get( + 'https://hmdb.ca/spectra/ms/generate_csv.csv?' + .'results%5Baction%5D=search' + .'&results%5Badduct_type%5D%5B%5D='.$adducts + #.'&results%5Bauthenticity_token%5D=' + .'&results%5Bccs_predictors%5D=' + .'&results%5Bccs_tolerance%5D=' + .'&results%5Bcommit%5D=Search' + .'&results%5Bcontroller%5D=specdb%2Fms' + .'&results%5Bms_search_ion_mode%5D='.$mode + .'&results%5Bquery_masses%5D='.$masses + .'&results%5Btolerance%5D='.$delta + .'&results%5Btolerance_units%5D=Da' + .'&results%5Butf8%5D=%E2%9C%93' + + ); + $statusGetLine = $mech->status() ; + + if ($mech->success) { + @page = split ( /\n/, $res->decoded_content ) ; + $statusPostLine = 'OK' ; + } + else { + $statusPostLine = $mech->status() ; + warn "\t[HMDB service issue !! the server returned a $statusPostLine HTTP error]" ; + } + +# print Dumper $res->decoded_content ; + + return (\@page, $statusPostLine) ; +} +## END of SUB + + +=head2 METHOD parseHmdb5CSVResults + + ## Description : parse the csv results and get data - API 5.0 compliant + ## Input : $csv + ## Output : $results + ## Usage : my ( $results ) = parseHmdb5CSVResults( $csv ) ; + +=cut +## START of SUB +sub parseHmdb5CSVResults { + ## Retrieve Values + my $self = shift ; + my ( $csv, $masses, $max_query ) = @_ ; + + my $test = 0 ; + my ($query_mass,$compound_id,$formula,$compound_mass,$adduct,$adduct_type,$adduct_mass,$delta) = (0, undef, undef, undef, undef, undef, undef, undef) ; + + my %result_by_entry = () ; + my %features = () ; + +# print Dumper $csv ; +# print Dumper $masses ; +# print Dumper $max_query ; + + foreach my $line (@{$csv}) { + ## NEW HMDB format V5.0 - May2022 + if ($line !~ /query_mass,compound_id,compound_name,formula,monoisotopic_mass,adduct,adduct_type,adduct_m\/z,"delta\(ppm\),",ccs_value/) { + #query_mass,compound_id,compound_name,formula,monoisotopic_mass,adduct,adduct_type,adduct_m/z,"delta(ppm),",ccs_value', + + if ( $line =~ /(\d+\.\d+),(\w+),(.*),(\w+),(\d+\.\d+),([\w|n\/a|\s+]+)\s*,(\+|\-),(\d+\.\d+),(\d+),(\d*)/ ) { + print "$line\n" ; + #if ( $line =~ /(\d+\.\d+),(\w+),(.*),([\w|n\/a|\s+]+)\s*,(\w+),(\d+\.\d+),(.*),(\+|\-),(\d+\.\d+),(\d+)/ ) { + my @entry = ("$1","$2","$3","$4","$5","$6","$7","$8","$9","$10") ; + + if ( !exists $result_by_entry{$entry[0]} ) { $result_by_entry{$entry[0]} = [] ; } + + $features{ENTRY_ENTRY_ID} = $entry[1] ; + $features{ENTRY_ENTRY_NAME} = $entry[2] ; + $features{ENTRY_FORMULA} = $entry[3] ; + $features{ENTRY_CPD_MZ} = $entry[4] ; + $features{ENTRY_ADDUCT} = $entry[5] ; + $features{ENTRY_ADDUCT_TYPE} = $entry[6] ; + $features{ENTRY_ADDUCT_MZ} = $entry[7] ; + $features{ENTRY_DELTA} = $entry[8] ; + + my %temp = %features ; + push (@{$result_by_entry{$entry[0]} }, \%temp) ; + } +# elsif ($line =~ /(\d+\.\d+)/) { # +# ## 288.082286511284,HMDB0002255,R-Methylmalonyl-CoA, C01213 ,C25H40N7O19P3S,867.131252359,M-3H,-,288.036475,159 +# ## 283.108004472276,"Bicyclo_3,1,1heptane-2,3-diol,2,6,6_trimethyl","2,3-Pinanediol",n/a,C10H18O2,170.13067982,M+TFA-H,-,283.116266,29 +# ## 174.034120330029,HMDB0011723,2-Methylhippuric acid, C01586,C10H11NO3,193.073893223,M-H20-H,-,174.055503,123 +# ## 193.139160745841,HMDB0012109,"7-[(1R,2R,3R,5S)-3,5-Dihydroxy-2-[(1E,3S)-3-hydroxyoct-1-en-1-yl]cyclopentyl]-5,6-dihydroxyheptanoic acid", C06475,C20H36O7,388.246103506,M-2H,-,193.115776,121 +# ## 214.018826827064,HMDB0011723,2-Methylhippuric acid, C01586,C10H11NO3,193.073893223,M+Na-2H,-,214.048559,139 +# } +# else { +# +# warn "The parsed line ($line) does not match your pattern\n " ; +# } + } + else { + print "Header detected...Parsing is starting...\n" ; + next ; + } + } ## end foreach + + ## manage per query_mzs (keep query masses order by array) + my @results = () ; + foreach (@{$masses}) { + if ($result_by_entry{$_}) { + + ## cut all entries > $max_query - all entries were already sorted...by hmdb + my @temp_entries = @{$result_by_entry{$_}} ; + my @temp_cut = () ; + my $current_query = 0 ; + foreach (@temp_entries) { + $current_query ++ ; + if ($current_query > $max_query) { + last ; + } + else { + push (@temp_cut, $_) ; + } + } + push (@results, \@temp_cut) ; + } + else { push (@results, [] ) ; } ; + + } + return(\@results) ; +} +## END of SUB + =head2 METHOD parse_hmdb_csv_results - ## Description : parse the csv results and get data + ## Description : [DEPRECATED] parse the csv results and get data ## Input : $csv ## Output : $results ## Usage : my ( $results ) = parse_hmdb_csv_results( $csv ) ;
--- a/t/hmdb_managerTest.pl Fri Nov 20 17:29:18 2020 +0000 +++ b/t/hmdb_managerTest.pl Thu May 19 13:43:09 2022 +0000 @@ -43,68 +43,36 @@ ['175.01', '238.19', '420.16', '780.32', '956.25', '1100.45' ] ), '175.01%0D%0A238.19%0D%0A420.16%0D%0A780.32%0D%0A956.25%0D%0A1100.45%0D%0A', 'Method \'prepare_multi_masses_query\' works with a list of and return a well formated string for hmdb querying'); - + print "\n** Test $current_test get_matches_from_hmdb_ua with a well-formated string of mzs **\n" ; $current_test++; - is_deeply( get_matches_from_hmdb_uaTest( + is_deeply( getMatchesFromHmdb5WithUATest( '175.01%0D%0A420.16%0D%0A780.32%0D%0A956.25%0D%0A1100.45%0D%0A', 0.001, 'positive'), [ - 'query_mass,compound_id,compound_name,kegg_id,formula,monoisotopic_mass,adduct,adduct_type,adduct_m/z,delta(ppm)', -# '175.01,HMDB60293,H2O3S2,113.94453531,M+IsoProp+H,+,175.009875,0.000125', -# '175.01,HMDB03745,C2H6O3S2,141.975835438,M+CH3OH+H,+,175.009324,0.000676', -# '175.01,HMDB31436,H4O4Si,95.987885149,M+DMSO+H,+,175.009105,0.000895', -# '175.01,HMDB33657,C17H10O6,310.047738052,M+H+K,+,175.009086,0.000914', -# '175.01,HMDB35230,C17H10O6,310.047738052,M+H+K,+,175.009086,0.000914', -# '420.16,HMDB60838,C17H17N3O4S,359.093976737,M+IsoProp+H,+,420.159317,0.000683', -# '420.16,HMDB60836,C17H17N3O4S,359.093976737,M+IsoProp+H,+,420.159317,0.000683' - ## Update format - 2018-01-19... HMDB V04 -# '175.01,HMDB0060293,Hydroxidodioxidosulfidosulfate,C05529,H2O3S2,113.94453531,M+IsoProp+H,+,175.009875,1', -# '175.01,HMDB0003745,Mesna,C03576,C2H6O3S2,141.975835438,M+CH3OH+H,+,175.009324,4', -# '175.01,HMDB0031436,Silicic acid,n/a,H4O4Si,95.987885149,M+DMSO+H,+,175.009105,5', -# '175.01,HMDB0033657,De-o-methylsterigmatocystin,C03683,C17H10O6,310.047738052,M+H+K,+,175.009086,5', -# '175.01,HMDB0035230,"6,7-Dihydroxy-3-(4-hydroxyphenyl)furo[3,2-b]chromen-2-one",n/a,C17H10O6,310.047738052,M+H+K,+,175.009086,5', -# '420.16,HMDB0060838,"4-hydroxy-5-[(4-{2-[(5-hydroxy-1,2-dihydropyridin-2-ylidene)amino]ethoxy}phenyl)methyl]-2,5-dihydro-1,3-thiazol-2-one",n/a,C17H17N3O4S,359.093976737,M+IsoProp+H,+,420.159317,2', -# '420.16,HMDB0060836,N-Desmethyl O-hydroxyrosiglitazone,n/a,C17H17N3O4S,359.093976737,M+IsoProp+H,+,420.159317,2', - ## Update results - 2018-09-20... HMDBv4 - #'query_mass,compound_id,compound_name,kegg_id,formula,monoisotopic_mass,adduct,adduct_type,adduct_m/z,delta(ppm)', - '175.01,HMDB0149376,"2-{[5,6-dihydroxy-3-(4-hydroxyphenyl)-4-oxo-3,4-dihydro-2H-1-benzopyran-7-yl]oxy}-3-(sulfooxy)butanedioic acid",n/a,C19H16O14S,500.026076369,M+2H+Na,+,175.009949,0', - '175.01,HMDB0149375,"2-({5,6-dihydroxy-4-oxo-3-[4-(sulfooxy)phenyl]-3,4-dihydro-2H-1-benzopyran-7-yl}oxy)-3-hydroxybutanedioic acid",n/a,C19H16O14S,500.026076369,M+2H+Na,+,175.009949,0', - '175.01,HMDB0149419,"2-{[5,8-dihydroxy-3-(4-hydroxyphenyl)-4-oxo-3,4-dihydro-2H-1-benzopyran-7-yl]oxy}-3-(sulfooxy)butanedioic acid",n/a,C19H16O14S,500.026076369,M+2H+Na,+,175.009949,0', - '175.01,HMDB0149418,"2-({5,8-dihydroxy-4-oxo-3-[4-(sulfooxy)phenyl]-3,4-dihydro-2H-1-benzopyran-7-yl}oxy)-3-hydroxybutanedioic acid",n/a,C19H16O14S,500.026076369,M+2H+Na,+,175.009949,0', - '175.01,HMDB0149392,"2-{[3-(3,4-dihydroxyphenyl)-5-hydroxy-4-oxo-3,4-dihydro-2H-1-benzopyran-7-yl]oxy}-3-(sulfooxy)butanedioic acid",n/a,C19H16O14S,500.026076369,M+2H+Na,+,175.009949,0', - '175.01,HMDB0149391,"2-hydroxy-3-({5-hydroxy-3-[4-hydroxy-3-(sulfooxy)phenyl]-4-oxo-3,4-dihydro-2H-1-benzopyran-7-yl}oxy)butanedioic acid",n/a,C19H16O14S,500.026076369,M+2H+Na,+,175.009949,0', - '175.01,HMDB0000257,Thiosulfate,C05529,H2O3S2,113.94453531,M+IsoProp+H,+,175.009875,1', - '175.01,HMDB0060293,Thiosulfate,C05529,H2O3S2,113.94453531,M+IsoProp+H,+,175.009875,1', - '175.01,HMDB0141391,{4-[(1E)-3-oxo-3-phenylprop-1-en-1-yl]phenyl}oxidanesulfonic acid,n/a,C15H12O5S,304.04054466,M+2Na,+,175.00949,3', - '175.01,HMDB0141389,{3-[(1E)-3-oxo-3-phenylprop-1-en-1-yl]phenyl}oxidanesulfonic acid,n/a,C15H12O5S,304.04054466,M+2Na,+,175.00949,3', - '175.01,HMDB0135622,{4-[(2E)-3-phenylprop-2-enoyl]phenyl}oxidanesulfonic acid,n/a,C15H12O5S,304.04054466,M+2Na,+,175.00949,3', - '175.01,HMDB0135448,{3-[(2E)-3-phenylprop-2-enoyl]phenyl}oxidanesulfonic acid,n/a,C15H12O5S,304.04054466,M+2Na,+,175.00949,3', - '175.01,HMDB0240280,Methanesulfonic acid,C11145,CH4O3S,95.988114684,M+DMSO+H,+,175.009335,4', - '175.01,HMDB0003745,Coenzyme m,C03576,C2H6O3S2,141.975835438,M+CH3OH+H,+,175.009324,4', - '175.01,HMDB0142159,[2-hydroxy-2-(2-hydroxyphenyl)-1-phenylethoxy]sulfonic acid,n/a,C14H14O6S,310.051109345,M+H+K,+,175.010772,4', - '175.01,HMDB0142160,[2-hydroxy-1-(2-hydroxyphenyl)-2-phenylethoxy]sulfonic acid,n/a,C14H14O6S,310.051109345,M+H+K,+,175.010772,4', - '175.01,HMDB0142162,"[3-(1,2-dihydroxy-2-phenylethyl)phenyl]oxidanesulfonic acid",n/a,C14H14O6S,310.051109345,M+H+K,+,175.010772,4', - '175.01,HMDB0142166,"[4-(1,2-dihydroxy-2-phenylethyl)phenyl]oxidanesulfonic acid",n/a,C14H14O6S,310.051109345,M+H+K,+,175.010772,4', - '175.01,HMDB0142164,[2-hydroxy-1-(3-hydroxyphenyl)-2-phenylethoxy]sulfonic acid,n/a,C14H14O6S,310.051109345,M+H+K,+,175.010772,4', - '175.01,HMDB0142163,[2-hydroxy-2-(3-hydroxyphenyl)-1-phenylethoxy]sulfonic acid,n/a,C14H14O6S,310.051109345,M+H+K,+,175.010772,4', - '175.01,HMDB0142167,[2-hydroxy-2-(4-hydroxyphenyl)-1-phenylethoxy]sulfonic acid,n/a,C14H14O6S,310.051109345,M+H+K,+,175.010772,4', - '175.01,HMDB0142168,[2-hydroxy-1-(4-hydroxyphenyl)-2-phenylethoxy]sulfonic acid,n/a,C14H14O6S,310.051109345,M+H+K,+,175.010772,4', - '175.01,HMDB0129951,"{3-[2-(3,5-dihydroxyphenyl)ethyl]phenyl}oxidanesulfonic acid",n/a,C14H14O6S,310.051109345,M+H+K,+,175.010772,4', - '175.01,HMDB0031436,Silicic acid,n/a,H4O4Si,95.987885149,M+DMSO+H,+,175.009105,5', - '175.01,HMDB0033657,"11,15-Dihydroxy-6,8,20-trioxapentacyclo[10.8.0.0²,⁹.0³,⁷.0¹⁴,¹⁹]icosa-1(12),2(9),4,10,14,16,18-heptaen-13-one",C03683,C17H10O6,310.047738052,M+H+K,+,175.009086,5', - '175.01,HMDB0035230,"6,7-Dihydroxy-3-(4-hydroxyphenyl)-2H-furo[3,2-b]chromen-2-one",n/a,C17H10O6,310.047738052,M+H+K,+,175.009086,5', - '175.01,HMDB0155176,5-Hydroxy-6-hydrouracil,n/a,C4H6N2O3,130.037842061,M+2Na-H,+,175.009002,6', - '420.16,HMDB0060838,N-Desmethyl-p-hydroxyrosiglitazone,n/a,C17H17N3O4S,359.093976737,M+IsoProp+H,+,420.159317,2', - '420.16,HMDB0060836,N-Desmethyl-O-hydroxy rosiglitazone,n/a,C17H17N3O4S,359.093976737,M+IsoProp+H,+,420.159317,2', - '420.16,HMDB0160148,"6-[(5-carboxy-3-methylpentanoyl)oxy]-3,4,5-trihydroxyoxane-2-carboxylic acid",n/a,C13H20O10,336.105646844,M+IsoProp+Na+H,+,420.160757,2', - '420.16,HMDB0160149,"6-[(5-carboxy-4-methylpentanoyl)oxy]-3,4,5-trihydroxyoxane-2-carboxylic acid",n/a,C13H20O10,336.105646844,M+IsoProp+Na+H,+,420.160757,2', - '420.16,HMDB0165020,"6-{[2-(acetyloxy)-3-methylbutanoyl]oxy}-3,4,5-trihydroxyoxane-2-carboxylic acid",n/a,C13H20O10,336.105646844,M+IsoProp+Na+H,+,420.160757,2', - '420.16,HMDB0175397,"6-{[3-(acetyloxy)-3-methylbutanoyl]oxy}-3,4,5-trihydroxyoxane-2-carboxylic acid",n/a,C13H20O10,336.105646844,M+IsoProp+Na+H,+,420.160757,2', - '420.16,HMDB0184353,"6-[(4-carboxy-3,3-dimethylbutanoyl)oxy]-3,4,5-trihydroxyoxane-2-carboxylic acid",n/a,C13H20O10,336.105646844,M+IsoProp+Na+H,+,420.160757,2' + 'query_mass,compound_id,compound_name,formula,monoisotopic_mass,adduct,adduct_type,adduct_m/z,"delta(ppm),",ccs_value', + '175.01,HMDB0259477,"S-(N,N-Diethyldithiocarbamoyl)-N-acetyl-L-cysteine",C10H18N2O3S3,310.04795597,M+H+K,+,175.009195,5,', + '175.01,HMDB0258038,9-(Formyloxymethyl)-9H-fluorene-2-sulfonic acid,C15H12O5S,304.04054466,M+2Na,+,175.00949,3,', + '175.01,HMDB0257737,"4,5-Dihydroxy-3-methyl-1H-imidazol-2-one",C4H6N2O3,130.037842061,M+2Na-H,+,175.009002,6,', + '175.01,HMDB0240502,Dihydroresveratrol 4\'-sulfate,C14H14O6S,310.051109345,M+H+K,+,175.010772,4,', + '175.01,HMDB0240500,Dihydroresveratrol 3-sulfate,C14H14O6S,310.051109345,M+H+K,+,175.010772,4,', + '175.01,HMDB0246744,"5-Amino-1,3,4-thiadiazole-2-thiol",C2H3N3S2,132.976839457,M+ACN+H,+,175.010662,4,', + '175.01,HMDB0252977,Phosphoaminophosphonic acid-guanylate ester,C10H17N6O13P3,522.006645625,M+3H,+,175.009491,3,', + '175.01,HMDB0246800,5-Hydroxy-1-methylhydantoin,C4H6N2O3,130.037842061,M+2Na-H,+,175.009002,6,', + '175.01,HMDB0258677,Tafamidis,C14H7Cl2NO3,306.980298509,M+ACN+2H,+,175.010699,4,', + '175.01,HMDB0240280,Mesylate,CH4O3S,95.988114684,M+DMSO+H,+,175.009335,4,', + '175.01,HMDB0000257,Thiosulfate,H2O3S2,113.94453531,M+IsoProp+H,+,175.009875,1,', + '175.01,HMDB0035230,Aurantricholide B,C17H10O6,310.047738052,M+H+K,+,175.009086,5,', + '175.01,HMDB0033657,De-O-methylsterigmatocystin,C17H10O6,310.047738052,M+H+K,+,175.009086,5,', + '175.01,HMDB0003745,Mesna,C2H6O3S2,141.975835438,M+CH3OH+H,+,175.009324,4,', + '420.16,HMDB0250452,Core oligosaccharide,C27H48O24,756.253552426,M+2ACN+2H,+,420.160599,1,', + '420.16,HMDB0245742,"N-(6,8-Difluoro-2-methyl-4-quinolinyl)-N\'-[4-(dimethylamino)phenyl]urea",C19H18F2N4O,356.144867542,M+ACN+Na,+,420.160633,2,', + '420.16,HMDB0256144,Pazopanib,C21H23N7O2S,437.163393705,M+H-H2O,+,420.160694,2,', + '420.16,HMDB0060838,N-Desmethyl-p-hydroxyrosiglitazone,C17H17N3O4S,359.093976737,M+IsoProp+H,+,420.159317,2,', + '420.16,HMDB0060836,N-Desmethyl-O-hydroxy rosiglitazone,C17H17N3O4S,359.093976737,M+IsoProp+H,+,420.159317,2,' ], - 'Method \'get_matches_from_hmdb_ua\' works with a well-formated string of mzs and return a complete csv from hmdb'); + 'Method \'getMatchesFromHmdb5WithUA\' works again (API v5.0 compatible) with a well-formated string of mzs and return a complete csv from hmdb'); print "\n** Test $current_test test_matches_from_hmdb_ua to get hmdb status **\n" ; $current_test++; - is_deeply (test_matches_from_hmdb_uaTest (), + is_deeply (testMatchesFromHmdb5WithUATest (), \'200', 'The HMDB server is available: returns successful HTTP requests' ) ; @@ -113,147 +81,144 @@ 1, 'The status 200 returns no error/warn' ) ; - print "\n** Test $current_test prepare_multi_masses_query with an empty list of mzs **\n" ; $current_test++; + print "\n** Test $current_test check_state_from_hmdb_ua to manage script execution with the hmdb server status **\n" ; $current_test++; throws_ok{ check_state_from_hmdb_uaTest(\'504')} - '/Gateway Timeout: The HMDB server was acting as a gateway or proxy and did not receive a timely response from the upstream server/', + qr/Gateway Timeout \(504\): The HMDB server was acting as a gateway or proxy and did not receive a timely response from the upstream server\. The Hmdb tool is stopped with error\./, 'Method \'check_state_from_hmdb_ua\' detects HTTP error code returned by HMDB and died correctly' ; - print "\n** Test $current_test parse_hmdb_csv_results with the correct inputs for hmdb outputs parsing (csv format) **\n" ; $current_test++; - is_deeply ( parse_hmdb_csv_resultsTest ( + + print "\n** Test $current_test parseHmdb5CSVResults (version 5.0) with the correct inputs for hmdb outputs parsing (csv format) **\n" ; $current_test++; + is_deeply ( parseHmdb5CSVResultsTest ( [ - 'query_mass,compound_id,compound_name,kegg_id,formula,monoisotopic_mass,adduct,adduct_type,adduct_m/z,delta(ppm)', -# '175.01,HMDB60293,H2O3S2,113.94453531,M+IsoProp+H,+,175.009875,0.000125', -# '175.01,HMDB03745,C2H6O3S2,141.975835438,M+CH3OH+H,+,175.009324,0.000676', -# '175.01,HMDB31436,H4O4Si,95.987885149,M+DMSO+H,+,175.009105,0.000895', -# '175.01,HMDB33657,C17H10O6,310.047738052,M+H+K,+,175.009086,0.000914', -# '175.01,HMDB35230,C17H10O6,310.047738052,M+H+K,+,175.009086,0.000914', -# '420.16,HMDB60838,C17H17N3O4S,359.093976737,M+IsoProp+H,+,420.159317,0.000683', -# '420.16,HMDB60836,C17H17N3O4S,359.093976737,M+IsoProp+H,+,420.159317,0.000683' - ## Update format - 2018-01-19... HMDB V04 - '175.01,HMDB0060293,Hydroxidodioxidosulfidosulfate,C05529,H2O3S2,113.94453531,M+IsoProp+H,+,175.009875,1', - '175.01,HMDB0003745,Mesna,C03576,C2H6O3S2,141.975835438,M+CH3OH+H,+,175.009324,4', - '175.01,HMDB0031436,Silicic acid,n/a,H4O4Si,95.987885149,M+DMSO+H,+,175.009105,5', - '175.01,HMDB0033657,De-o-methylsterigmatocystin,C03683,C17H10O6,310.047738052,M+H+K,+,175.009086,5', - '175.01,HMDB0035230,"6,7-Dihydroxy-3-(4-hydroxyphenyl)furo[3,2-b]chromen-2-one",n/a,C17H10O6,310.047738052,M+H+K,+,175.009086,5', - '420.16,HMDB0060838,"4-hydroxy-5-[(4-{2-[(5-hydroxy-1,2-dihydropyridin-2-ylidene)amino]ethoxy}phenyl)methyl]-2,5-dihydro-1,3-thiazol-2-one",n/a,C17H17N3O4S,359.093976737,M+IsoProp+H,+,420.159317,2', - '420.16,HMDB0060836,N-Desmethyl O-hydroxyrosiglitazone,n/a,C17H17N3O4S,359.093976737,M+IsoProp+H,+,420.159317,2', + 'query_mass,compound_id,compound_name,formula,monoisotopic_mass,adduct,adduct_type,adduct_m/z,"delta(ppm),",ccs_value', + '175.01,HMDB0259477,"S-(N,N-Diethyldithiocarbamoyl)-N-acetyl-L-cysteine",C10H18N2O3S3,310.04795597,M+H+K,+,175.009195,5,', + '175.01,HMDB0258038,9-(Formyloxymethyl)-9H-fluorene-2-sulfonic acid,C15H12O5S,304.04054466,M+2Na,+,175.00949,3,', + '175.01,HMDB0257737,"4,5-Dihydroxy-3-methyl-1H-imidazol-2-one",C4H6N2O3,130.037842061,M+2Na-H,+,175.009002,6,', + '175.01,HMDB0240502,Dihydroresveratrol 4\'-sulfate,C14H14O6S,310.051109345,M+H+K,+,175.010772,4,', + '175.01,HMDB0240500,Dihydroresveratrol 3-sulfate,C14H14O6S,310.051109345,M+H+K,+,175.010772,4,', + '175.01,HMDB0246744,"5-Amino-1,3,4-thiadiazole-2-thiol",C2H3N3S2,132.976839457,M+ACN+H,+,175.010662,4,', + '175.01,HMDB0252977,Phosphoaminophosphonic acid-guanylate ester,C10H17N6O13P3,522.006645625,M+3H,+,175.009491,3,', + '175.01,HMDB0246800,5-Hydroxy-1-methylhydantoin,C4H6N2O3,130.037842061,M+2Na-H,+,175.009002,6,', + '175.01,HMDB0258677,Tafamidis,C14H7Cl2NO3,306.980298509,M+ACN+2H,+,175.010699,4,', + '175.01,HMDB0240280,Mesylate,CH4O3S,95.988114684,M+DMSO+H,+,175.009335,4,', + '175.01,HMDB0000257,Thiosulfate,H2O3S2,113.94453531,M+IsoProp+H,+,175.009875,1,', + '175.01,HMDB0035230,Aurantricholide B,C17H10O6,310.047738052,M+H+K,+,175.009086,5,', + '175.01,HMDB0033657,De-O-methylsterigmatocystin,C17H10O6,310.047738052,M+H+K,+,175.009086,5,', + '175.01,HMDB0003745,Mesna,C2H6O3S2,141.975835438,M+CH3OH+H,+,175.009324,4,', + '420.16,HMDB0250452,Core oligosaccharide,C27H48O24,756.253552426,M+2ACN+2H,+,420.160599,1,', + '420.16,HMDB0245742,"N-(6,8-Difluoro-2-methyl-4-quinolinyl)-N\'-[4-(dimethylamino)phenyl]urea",C19H18F2N4O,356.144867542,M+ACN+Na,+,420.160633,2,', + '420.16,HMDB0256144,Pazopanib,C21H23N7O2S,437.163393705,M+H-H2O,+,420.160694,2,', + '420.16,HMDB0060838,N-Desmethyl-p-hydroxyrosiglitazone,C17H17N3O4S,359.093976737,M+IsoProp+H,+,420.159317,2,', + '420.16,HMDB0060836,N-Desmethyl-O-hydroxy rosiglitazone,C17H17N3O4S,359.093976737,M+IsoProp+H,+,420.159317,2,' ], ['175.01', '238.19', '420.16'], 10 ), [ [ - { 'ENTRY_CPD_MZ' => '113.94453531', 'ENTRY_ENTRY_ID' => 'HMDB0060293', 'ENTRY_FORMULA' => 'H2O3S2', 'ENTRY_ADDUCT_MZ' => '175.009875', 'ENTRY_DELTA' => '1', 'ENTRY_ADDUCT_TYPE' => '+', 'ENTRY_ADDUCT' => 'M+IsoProp+H' }, - {'ENTRY_DELTA' => '4','ENTRY_ADDUCT_TYPE' => '+','ENTRY_ADDUCT' => 'M+CH3OH+H','ENTRY_CPD_MZ' => '141.975835438','ENTRY_ENTRY_ID' => 'HMDB0003745','ENTRY_FORMULA' => 'C2H6O3S2','ENTRY_ADDUCT_MZ' => '175.009324'}, - {'ENTRY_ENTRY_ID' => 'HMDB0031436','ENTRY_CPD_MZ' => '95.987885149','ENTRY_ADDUCT_MZ' => '175.009105','ENTRY_FORMULA' => 'H4O4Si','ENTRY_DELTA' => '5','ENTRY_ADDUCT_TYPE' => '+','ENTRY_ADDUCT' => 'M+DMSO+H'}, - {'ENTRY_ADDUCT_MZ' => '175.009086','ENTRY_FORMULA' => 'C17H10O6','ENTRY_ENTRY_ID' => 'HMDB0033657','ENTRY_CPD_MZ' => '310.047738052','ENTRY_ADDUCT' => 'M+H+K','ENTRY_ADDUCT_TYPE' => '+','ENTRY_DELTA' => '5'}, - {'ENTRY_ADDUCT_MZ' => '175.009086','ENTRY_FORMULA' => 'C17H10O6','ENTRY_ENTRY_ID' => 'HMDB0035230','ENTRY_CPD_MZ' => '310.047738052','ENTRY_ADDUCT_TYPE' => '+','ENTRY_ADDUCT' => 'M+H+K','ENTRY_DELTA' => '5'} + { 'ENTRY_CPD_MZ' => '310.04795597', 'ENTRY_DELTA' => '5', 'ENTRY_FORMULA' => 'C10H18N2O3S3', 'ENTRY_ADDUCT' => 'M+H+K', 'ENTRY_ENTRY_NAME' => '"S-(N,N-Diethyldithiocarbamoyl)-N-acetyl-L-cysteine"', 'ENTRY_ENTRY_ID' => 'HMDB0259477', 'ENTRY_ADDUCT_TYPE' => '+', 'ENTRY_ADDUCT_MZ' => '175.009195' }, + { 'ENTRY_ADDUCT_TYPE' => '+', 'ENTRY_ADDUCT_MZ' => '175.00949', 'ENTRY_ENTRY_ID' => 'HMDB0258038', 'ENTRY_ENTRY_NAME' => '9-(Formyloxymethyl)-9H-fluorene-2-sulfonic acid', 'ENTRY_FORMULA' => 'C15H12O5S', 'ENTRY_ADDUCT' => 'M+2Na', 'ENTRY_DELTA' => '3', 'ENTRY_CPD_MZ' => '304.04054466' }, + { 'ENTRY_FORMULA' => 'C14H14O6S', 'ENTRY_ADDUCT' => 'M+H+K', 'ENTRY_ENTRY_NAME' => 'Dihydroresveratrol 4\'-sulfate', 'ENTRY_CPD_MZ' => '310.051109345', 'ENTRY_DELTA' => '4', 'ENTRY_ADDUCT_TYPE' => '+', 'ENTRY_ADDUCT_MZ' => '175.010772', 'ENTRY_ENTRY_ID' => 'HMDB0240502' }, + { 'ENTRY_ADDUCT_MZ' => '175.010772', 'ENTRY_ADDUCT_TYPE' => '+', 'ENTRY_ENTRY_ID' => 'HMDB0240500', 'ENTRY_ADDUCT' => 'M+H+K', 'ENTRY_FORMULA' => 'C14H14O6S', 'ENTRY_ENTRY_NAME' => 'Dihydroresveratrol 3-sulfate', 'ENTRY_CPD_MZ' => '310.051109345', 'ENTRY_DELTA' => '4' }, + { 'ENTRY_ENTRY_ID' => 'HMDB0246744', 'ENTRY_ADDUCT_TYPE' => '+', 'ENTRY_ADDUCT_MZ' => '175.010662', 'ENTRY_CPD_MZ' => '132.976839457', 'ENTRY_DELTA' => '4', 'ENTRY_ENTRY_NAME' => '"5-Amino-1,3,4-thiadiazole-2-thiol"', 'ENTRY_FORMULA' => 'C2H3N3S2', 'ENTRY_ADDUCT' => 'M+ACN+H' }, + { 'ENTRY_ENTRY_ID' => 'HMDB0252977', 'ENTRY_ADDUCT_MZ' => '175.009491', 'ENTRY_ADDUCT_TYPE' => '+', 'ENTRY_CPD_MZ' => '522.006645625', 'ENTRY_DELTA' => '3', 'ENTRY_ADDUCT' => 'M+3H', 'ENTRY_FORMULA' => 'C10H17N6O13P3', 'ENTRY_ENTRY_NAME' => 'Phosphoaminophosphonic acid-guanylate ester' }, + { 'ENTRY_ENTRY_ID' => 'HMDB0258677', 'ENTRY_ADDUCT_MZ' => '175.010699', 'ENTRY_ADDUCT_TYPE' => '+', 'ENTRY_CPD_MZ' => '306.980298509', 'ENTRY_DELTA' => '4', 'ENTRY_ADDUCT' => 'M+ACN+2H', 'ENTRY_FORMULA' => 'C14H7Cl2NO3', 'ENTRY_ENTRY_NAME' => 'Tafamidis' }, + { 'ENTRY_ENTRY_ID' => 'HMDB0240280', 'ENTRY_ADDUCT_TYPE' => '+', 'ENTRY_ADDUCT_MZ' => '175.009335', 'ENTRY_DELTA' => '4', 'ENTRY_CPD_MZ' => '95.988114684', 'ENTRY_FORMULA' => 'CH4O3S', 'ENTRY_ADDUCT' => 'M+DMSO+H', 'ENTRY_ENTRY_NAME' => 'Mesylate' }, + { 'ENTRY_DELTA' => '1', 'ENTRY_CPD_MZ' => '113.94453531', 'ENTRY_FORMULA' => 'H2O3S2', 'ENTRY_ADDUCT' => 'M+IsoProp+H', 'ENTRY_ENTRY_NAME' => 'Thiosulfate', 'ENTRY_ENTRY_ID' => 'HMDB0000257', 'ENTRY_ADDUCT_TYPE' => '+', 'ENTRY_ADDUCT_MZ' => '175.009875' }, + { 'ENTRY_DELTA' => '5', 'ENTRY_CPD_MZ' => '310.047738052', 'ENTRY_ENTRY_NAME' => 'Aurantricholide B', 'ENTRY_FORMULA' => 'C17H10O6', 'ENTRY_ADDUCT' => 'M+H+K', 'ENTRY_ENTRY_ID' => 'HMDB0035230', 'ENTRY_ADDUCT_TYPE' => '+', 'ENTRY_ADDUCT_MZ' => '175.009086' } ], [], [ - {'ENTRY_FORMULA' => 'C17H17N3O4S','ENTRY_ADDUCT_MZ' => '420.159317','ENTRY_CPD_MZ' => '359.093976737','ENTRY_ENTRY_ID' => 'HMDB0060838','ENTRY_ADDUCT' => 'M+IsoProp+H','ENTRY_ADDUCT_TYPE' => '+','ENTRY_DELTA' => '2'}, - {'ENTRY_DELTA' => '2','ENTRY_ADDUCT_TYPE' => '+','ENTRY_ADDUCT' => 'M+IsoProp+H','ENTRY_ENTRY_ID' => 'HMDB0060836','ENTRY_CPD_MZ' => '359.093976737','ENTRY_ADDUCT_MZ' => '420.159317','ENTRY_FORMULA' => 'C17H17N3O4S'} + { 'ENTRY_ADDUCT_MZ' => '420.160599', 'ENTRY_ADDUCT_TYPE' => '+', 'ENTRY_ENTRY_ID' => 'HMDB0250452', 'ENTRY_ADDUCT' => 'M+2ACN+2H', 'ENTRY_FORMULA' => 'C27H48O24', 'ENTRY_ENTRY_NAME' => 'Core oligosaccharide', 'ENTRY_CPD_MZ' => '756.253552426', 'ENTRY_DELTA' => '1' }, + { 'ENTRY_CPD_MZ' => '356.144867542', 'ENTRY_DELTA' => '2', 'ENTRY_ENTRY_NAME' => '"N-(6,8-Difluoro-2-methyl-4-quinolinyl)-N\'-[4-(dimethylamino)phenyl]urea"', 'ENTRY_FORMULA' => 'C19H18F2N4O', 'ENTRY_ADDUCT' => 'M+ACN+Na', 'ENTRY_ENTRY_ID' => 'HMDB0245742', 'ENTRY_ADDUCT_TYPE' => '+', 'ENTRY_ADDUCT_MZ' => '420.160633' }, + { 'ENTRY_ADDUCT_MZ' => '420.159317', 'ENTRY_ADDUCT_TYPE' => '+', 'ENTRY_ENTRY_ID' => 'HMDB0060838', 'ENTRY_ADDUCT' => 'M+IsoProp+H', 'ENTRY_FORMULA' => 'C17H17N3O4S', 'ENTRY_ENTRY_NAME' => 'N-Desmethyl-p-hydroxyrosiglitazone', 'ENTRY_CPD_MZ' => '359.093976737', 'ENTRY_DELTA' => '2' }, + { 'ENTRY_ADDUCT_MZ' => '420.159317', 'ENTRY_ADDUCT_TYPE' => '+', 'ENTRY_ENTRY_ID' => 'HMDB0060836', 'ENTRY_ADDUCT' => 'M+IsoProp+H', 'ENTRY_FORMULA' => 'C17H17N3O4S', 'ENTRY_ENTRY_NAME' => 'N-Desmethyl-O-hydroxy rosiglitazone', 'ENTRY_DELTA' => '2', 'ENTRY_CPD_MZ' => '359.093976737' } ] ], - 'Method \'parse_hmdb_csv_results\' works with a well-formated csv output and returns a a well formated array' ) ; - - print "\n** Test $current_test parse_hmdb_csv_results with a void hmdb output and a list of mzs **\n" ; $current_test++; - is_deeply ( parse_hmdb_csv_resultsTest ( [], ['175.01', '238.19', '420.16'] ), - [ [], [], [] ], - 'Method \'parse_hmdb_csv_results\' works with a empty csv output and returns an empty but well formatted array' ) ; - - print "\n** Test $current_test parse_hmdb_csv_results with a void hmdb output and a void mz list **\n" ; $current_test++; - is_deeply ( parse_hmdb_csv_resultsTest ( [], [] ), - [], - 'Method \'parse_hmdb_csv_results\' works with a empty csv output/mz list and returns an empty but well formatted array' ) ; - - ## Other example... from mz 265.0567515 - print "\n** Test $current_test parse_hmdb_csv_results with mz 265.0567515 and the correct inputs for hmdb outputs parsing (csv format) **\n" ; $current_test++; - is_deeply ( parse_hmdb_csv_resultsTest ( - [ - 'query_mass,compound_id,compound_name,kegg_id,formula,monoisotopic_mass,adduct,adduct_type,adduct_m/z,delta(ppm)', - '265.0567515,HMDB0059667,3-Methylsulfolene,n/a,C5H8O2S,132.02450019,2M+H,+,265.056276,2', - '265.0567515,HMDB0038767,Camelliaside b,n/a,C32H38O19,726.200729034,M+3Na,+,265.056128,2', - '265.0567515,HMDB0039759,Kaempferol 3-(2g-apiosylrobinobioside),n/a,C32H38O19,726.200729034,M+3Na,+,265.056128,2', - '265.0567515,HMDB0040875,Isoschaftoside 4\'-glucoside,n/a,C32H38O19,726.200729034,M+3Na,+,265.056128,2', - '265.0567515,HMDB0040878,Schaftoside 4\'-glucoside,n/a,C32H38O19,726.200729034,M+3Na,+,265.056128,2', - '265.0567515,HMDB0059600,erythro-5-Phosphonooxy-L-lysine,C03366,C6H15N2O6P,242.066772734,M+Na,+,265.055991,3', - '265.0567515,HMDB0061174,"2,4-Imidazolidinedione, 3-ethyl-5-(4-hydroxyphenyl)-",n/a,C11H12N2O3,220.08479226,M+2Na-H,+,265.055952,3', - '265.0567515,HMDB0015571,Oxitriptan,C00643,C11H12N2O3,220.08479226,M+2Na-H,+,265.055952,3', - '265.0567515,HMDB0000472,5-hydroxy-l-tryptophan,C01017,C11H12N2O3,220.08479226,M+2Na-H,+,265.055952,3', - '265.0567515,HMDB0030584,Silidianin,n/a,C25H24O10,484.136946988,M+2Na,+,265.057691,4', - ], - ['265.0567515'], - 5 - ), - [ - [ - { - 'ENTRY_DELTA' => '2', - 'ENTRY_FORMULA' => 'C5H8O2S', - 'ENTRY_ENTRY_ID' => 'HMDB0059667', - 'ENTRY_ADDUCT_MZ' => '265.056276', - 'ENTRY_ADDUCT_TYPE' => '+', - 'ENTRY_CPD_MZ' => '132.02450019', - 'ENTRY_ADDUCT' => '2M+H' - }, - { - 'ENTRY_ADDUCT_MZ' => '265.056128', - 'ENTRY_ENTRY_ID' => 'HMDB0038767', - 'ENTRY_ADDUCT_TYPE' => '+', - 'ENTRY_CPD_MZ' => '726.200729034', - 'ENTRY_ADDUCT' => 'M+3Na', - 'ENTRY_DELTA' => '2', - 'ENTRY_FORMULA' => 'C32H38O19' - }, - { - 'ENTRY_DELTA' => '2', - 'ENTRY_FORMULA' => 'C32H38O19', - 'ENTRY_ENTRY_ID' => 'HMDB0039759', - 'ENTRY_ADDUCT_MZ' => '265.056128', - 'ENTRY_ADDUCT_TYPE' => '+', - 'ENTRY_CPD_MZ' => '726.200729034', - 'ENTRY_ADDUCT' => 'M+3Na' - }, - { - 'ENTRY_FORMULA' => 'C32H38O19', - 'ENTRY_DELTA' => '2', - 'ENTRY_ADDUCT_TYPE' => '+', - 'ENTRY_ADDUCT_MZ' => '265.056128', - 'ENTRY_ENTRY_ID' => 'HMDB0040875', - 'ENTRY_CPD_MZ' => '726.200729034', - 'ENTRY_ADDUCT' => 'M+3Na' - }, - { - 'ENTRY_ADDUCT_TYPE' => '+', - 'ENTRY_ENTRY_ID' => 'HMDB0040878', - 'ENTRY_ADDUCT_MZ' => '265.056128', - 'ENTRY_ADDUCT' => 'M+3Na', - 'ENTRY_CPD_MZ' => '726.200729034', - 'ENTRY_FORMULA' => 'C32H38O19', - 'ENTRY_DELTA' => '2' - } - ] - ], - 'Method \'parse_hmdb_csv_results\' works with a well-formated csv output and returns a a well formated array' ) ; - - -} + 'Method \'parseHmdb5CSVResults\' works with a well-formated csv output and returns a a well formated array' ) ; - - - - - - - +# print "\n** Test $current_test parse_hmdb_csv_results with a void hmdb output and a list of mzs **\n" ; $current_test++; +# is_deeply ( parse_hmdb_csv_resultsTest ( [], ['175.01', '238.19', '420.16'] ), +# [ [], [], [] ], +# 'Method \'parse_hmdb_csv_results\' works with a empty csv output and returns an empty but well formatted array' ) ; +# +# print "\n** Test $current_test parse_hmdb_csv_results with a void hmdb output and a void mz list **\n" ; $current_test++; +# is_deeply ( parse_hmdb_csv_resultsTest ( [], [] ), +# [], +# 'Method \'parse_hmdb_csv_results\' works with a empty csv output/mz list and returns an empty but well formatted array' ) ; +# +# ## Other example... from mz 265.0567515 +# print "\n** Test $current_test parse_hmdb_csv_results with mz 265.0567515 and the correct inputs for hmdb outputs parsing (csv format) **\n" ; $current_test++; +# is_deeply ( parse_hmdb_csv_resultsTest ( +# [ +# 'query_mass,compound_id,compound_name,kegg_id,formula,monoisotopic_mass,adduct,adduct_type,adduct_m/z,delta(ppm)', +# '265.0567515,HMDB0059667,3-Methylsulfolene,n/a,C5H8O2S,132.02450019,2M+H,+,265.056276,2', +# '265.0567515,HMDB0038767,Camelliaside b,n/a,C32H38O19,726.200729034,M+3Na,+,265.056128,2', +# '265.0567515,HMDB0039759,Kaempferol 3-(2g-apiosylrobinobioside),n/a,C32H38O19,726.200729034,M+3Na,+,265.056128,2', +# '265.0567515,HMDB0040875,Isoschaftoside 4\'-glucoside,n/a,C32H38O19,726.200729034,M+3Na,+,265.056128,2', +# '265.0567515,HMDB0040878,Schaftoside 4\'-glucoside,n/a,C32H38O19,726.200729034,M+3Na,+,265.056128,2', +# '265.0567515,HMDB0059600,erythro-5-Phosphonooxy-L-lysine,C03366,C6H15N2O6P,242.066772734,M+Na,+,265.055991,3', +# '265.0567515,HMDB0061174,"2,4-Imidazolidinedione, 3-ethyl-5-(4-hydroxyphenyl)-",n/a,C11H12N2O3,220.08479226,M+2Na-H,+,265.055952,3', +# '265.0567515,HMDB0015571,Oxitriptan,C00643,C11H12N2O3,220.08479226,M+2Na-H,+,265.055952,3', +# '265.0567515,HMDB0000472,5-hydroxy-l-tryptophan,C01017,C11H12N2O3,220.08479226,M+2Na-H,+,265.055952,3', +# '265.0567515,HMDB0030584,Silidianin,n/a,C25H24O10,484.136946988,M+2Na,+,265.057691,4', +# ], +# ['265.0567515'], +# 5 +# ), +# [ +# [ +# { +# 'ENTRY_DELTA' => '2', +# 'ENTRY_FORMULA' => 'C5H8O2S', +# 'ENTRY_ENTRY_ID' => 'HMDB0059667', +# 'ENTRY_ADDUCT_MZ' => '265.056276', +# 'ENTRY_ADDUCT_TYPE' => '+', +# 'ENTRY_CPD_MZ' => '132.02450019', +# 'ENTRY_ADDUCT' => '2M+H' +# }, +# { +# 'ENTRY_ADDUCT_MZ' => '265.056128', +# 'ENTRY_ENTRY_ID' => 'HMDB0038767', +# 'ENTRY_ADDUCT_TYPE' => '+', +# 'ENTRY_CPD_MZ' => '726.200729034', +# 'ENTRY_ADDUCT' => 'M+3Na', +# 'ENTRY_DELTA' => '2', +# 'ENTRY_FORMULA' => 'C32H38O19' +# }, +# { +# 'ENTRY_DELTA' => '2', +# 'ENTRY_FORMULA' => 'C32H38O19', +# 'ENTRY_ENTRY_ID' => 'HMDB0039759', +# 'ENTRY_ADDUCT_MZ' => '265.056128', +# 'ENTRY_ADDUCT_TYPE' => '+', +# 'ENTRY_CPD_MZ' => '726.200729034', +# 'ENTRY_ADDUCT' => 'M+3Na' +# }, +# { +# 'ENTRY_FORMULA' => 'C32H38O19', +# 'ENTRY_DELTA' => '2', +# 'ENTRY_ADDUCT_TYPE' => '+', +# 'ENTRY_ADDUCT_MZ' => '265.056128', +# 'ENTRY_ENTRY_ID' => 'HMDB0040875', +# 'ENTRY_CPD_MZ' => '726.200729034', +# 'ENTRY_ADDUCT' => 'M+3Na' +# }, +# { +# 'ENTRY_ADDUCT_TYPE' => '+', +# 'ENTRY_ENTRY_ID' => 'HMDB0040878', +# 'ENTRY_ADDUCT_MZ' => '265.056128', +# 'ENTRY_ADDUCT' => 'M+3Na', +# 'ENTRY_CPD_MZ' => '726.200729034', +# 'ENTRY_FORMULA' => 'C32H38O19', +# 'ENTRY_DELTA' => '2' +# } +# ] +# ], +# 'Method \'parse_hmdb_csv_results\' works with a well-formated csv output and returns a a well formated array' ) ; +# +# - - - - - - - +} ## END of the script \ No newline at end of file
--- a/t/lib/hmdbTest.pm Fri Nov 20 17:29:18 2020 +0000 +++ b/t/lib/hmdbTest.pm Thu May 19 13:43:09 2022 +0000 @@ -8,8 +8,8 @@ our $VERSION = "1.0"; our @ISA = qw(Exporter); -our @EXPORT = qw( parse_hmdb_csv_resultsTest check_state_from_hmdb_uaTest test_matches_from_hmdb_uaTest extract_sub_mz_listsTest prepare_multi_masses_queryTest get_matches_from_hmdb_uaTest); -our %EXPORT_TAGS = ( ALL => [qw( parse_hmdb_csv_resultsTest check_state_from_hmdb_uaTest test_matches_from_hmdb_uaTest extract_sub_mz_listsTest prepare_multi_masses_queryTest get_matches_from_hmdb_uaTest)] ); +our @EXPORT = qw( testMatchesFromHmdb5WithUATest parseHmdb5CSVResultsTest getMatchesFromHmdb5WithUATest parse_hmdb_csv_resultsTest check_state_from_hmdb_uaTest test_matches_from_hmdb_uaTest extract_sub_mz_listsTest prepare_multi_masses_queryTest get_matches_from_hmdb_uaTest); +our %EXPORT_TAGS = ( ALL => [qw( testMatchesFromHmdb5WithUATest parseHmdb5CSVResultsTest getMatchesFromHmdb5WithUATest parse_hmdb_csv_resultsTest check_state_from_hmdb_uaTest test_matches_from_hmdb_uaTest extract_sub_mz_listsTest prepare_multi_masses_queryTest get_matches_from_hmdb_uaTest)] ); use lib '/Users/fgiacomoni/Inra/labs/perl/galaxy_tools/hmdb' ; use lib::hmdb qw( :ALL ) ; @@ -49,6 +49,17 @@ return ($hmdb_pages) ; } +## sub +sub getMatchesFromHmdb5WithUATest { + + my ( $hmdb_masses, $delta, $molecular_species ) = @_ ; + + my $oHmdb = lib::hmdb->new() ; + my ($hmdb_pages, $status) = $oHmdb->getMatchesFromHmdb5WithUA($hmdb_masses, $delta, $molecular_species) ; + print Dumper $hmdb_pages ; + return ($hmdb_pages) ; +} + ## sub sub test_matches_from_hmdb_uaTest { @@ -58,6 +69,13 @@ return ($status) ; } +## sub +sub testMatchesFromHmdb5WithUATest { + + my $oHmdb = lib::hmdb->new() ; + my $status = $oHmdb->testMatchesFromHmdb5WithUA() ; + return ($status) ; +} ## sub sub check_state_from_hmdb_uaTest { @@ -69,6 +87,15 @@ } +## sub +sub parseHmdb5CSVResultsTest { + my ($hmdb_pages, $mzs, $max ) = @_ ; + + my $oHmdb = lib::hmdb->new() ; + my $result = $oHmdb->parseHmdb5CSVResults($hmdb_pages, $mzs, $max) ; ## hash format result + #print Dumper $result ; + return($result) ; +} ## sub sub parse_hmdb_csv_resultsTest {
--- a/wsdl_hmdb.pl Fri Nov 20 17:29:18 2020 +0000 +++ b/wsdl_hmdb.pl Thu May 19 13:43:09 2022 +0000 @@ -49,7 +49,7 @@ "output_tabular:s" => \$out_tab, ## option : path to the ouput (tabular : input+results ) "output_html|v:s" => \$out_html, ## option : path to the results view (output2) "output_xlsx:s" => \$out_xls, ## option : path to the xls-like format output - "advancedFeatures:i"=> \$advancedFeatures, ## option : set to 1 to get advanced options or 0 to get first level only. + #"advancedFeatures:i"=> \$advancedFeatures, ## option : set to 1 to get advanced options or 0 to get first level only. "verbose:i" => \$VERBOSE, ## VERBOSE Of the tool ) ; @@ -137,7 +137,7 @@ print "\tand ".scalar(@$masses)." masses are submitted as ".scalar(@$submasses)." queries to HMDB \n\n" if ($VERBOSE>1) ; ## get the hmdb server status by a test query - continuous queries or kill script. - $status = $oHmdb->testMatchesFromHmdbWithUA() ; + $status = $oHmdb->testMatchesFromHmdb5WithUA() ; $oHmdb->check_state_from_hmdb_ua($status) ; ## can kill the script execution my $cluster = 1 ; @@ -151,7 +151,7 @@ print "\n\tSubmission of m/z cluster ".sprintf '%04s',$cluster."" if ($VERBOSE>1) ; - ($hmdb_pages, $status) = $oHmdb->getMatchesFromHmdbWithUA($hmdb_masses, $delta, $molecular_species, $hmdb_adducts) ; + ($hmdb_pages, $status) = $oHmdb->getMatchesFromHmdb5WithUA($hmdb_masses, $delta, $molecular_species, $hmdb_adducts) ; print "...HMDB reply results with status: $status\n" if ($VERBOSE>1) ; # print Dumper $hmdb_pages ; @@ -160,7 +160,7 @@ ## hard modification with $max_query fixed at 1000 !!! Need to be refactoring ## Cutof will be done in next method after URI check - ($result) = $oHmdb->parse_hmdb_csv_results($hmdb_pages, $mzs, 1000) ; ## hash format result + ($result) = $oHmdb->getMatchesFromHmdb5WithUA($hmdb_pages, $mzs, 1000) ; ## hash format result ## This previous step return results with cutoff on the number of entries returned ! @@ -215,6 +215,10 @@ my ($tbody_object) = $oHtml->set_html_tbody_object( $nb_pages_for_html_out, $CONF->{HTML_ENTRIES_PER_PAGE} ) ; ($tbody_object) = $oHtml->add_mz_to_tbody_object($tbody_object, $CONF->{HTML_ENTRIES_PER_PAGE}, $masses, $ids) ; ($tbody_object) = $oHtml->add_entries_to_tbody_object($tbody_object, $CONF->{HTML_ENTRIES_PER_PAGE}, $masses, $results) ; + + print Dumper $tbody_object ; + + my $output_html = $oHtml->write_html_skel(\$out_html, $tbody_object, $nb_pages_for_html_out, $search_condition, $CONF->{'HTML_TEMPLATE'}, $CONF->{'JS_GALAXY_PATH'}, $CONF->{'CSS_GALAXY_PATH'}) ; } ## END IF @@ -305,7 +309,7 @@ =head1 SYNOPSIS -This script manages batch queries on HMDB server. +This script manages batch queries on HMDB server (v5.0). =head1 DESCRIPTION @@ -343,4 +347,6 @@ version 1.6.1 : 30 / 01 / 2019 - Adding adducts and fixxing minors bugs and requirements +version 1.7.0 : 19/ 05 / 2022 - Update HMDB API client - compliant with HMDB 5.0 web portal + =cut \ No newline at end of file
--- a/wsdl_hmdb.xml Fri Nov 20 17:29:18 2020 +0000 +++ b/wsdl_hmdb.xml Thu May 19 13:43:09 2022 +0000 @@ -1,4 +1,4 @@ -<tool id="wsdl_hmdb" name="HMDB MS search" version="1.6.1"> +<tool id="wsdl_hmdb" name="HMDB MS search" version="1.7.0"> <description> search by masses on HMDB online LCMS bank </description> @@ -243,7 +243,7 @@ .. class:: infomark -**Authors** Marion Landi and Franck Giacomoni +**Authors** Marion Landi, Yann Guitton and Franck Giacomoni --------------------------------------------------- @@ -252,6 +252,7 @@ **Please cite** If you use this tool, please add the following reference | Wishart DS, Jewison T, Guo AC, Wilson M, Knox C, et al., HMDB 3.0 — The Human Metabolome Database in 2013. Nucleic Acids Res. 2013. | Wishart DS, Feunang YD, Marcu A, Guo AC, Liang K, et al., HMDB 4.0 — The Human Metabolome Database for 2018. Nucleic Acids Res. 2018. + | Wishart DS et al, HMDB 5.0 - the Human Metabolome Database for 2022, Nucleic Acids Res. 2022. --------------------------------------------------- @@ -376,7 +377,7 @@ .. class:: warningmark -And their "W4M courses 2018": +And their "W4M courses 2021": | Using Galaxy4Metabolomics - W4M table format for Galaxy | Annotation Banks - Annotation @@ -386,5 +387,6 @@ <citations> <citation type="doi">10.1093/nar/gks1065</citation> <citation type="doi">10.1093/nar/gkx1089</citation> + <citation type="doi">10.1093/nar/gkab1062</citation> </citations> </tool> \ No newline at end of file