comparison lib/hmdb.pm @ 20:b5a1d5e43685 draft

Master branch Updating - - Fxx
author fgiacomoni
date Wed, 23 Jan 2019 07:49:34 -0500
parents 49f87ddb2c78
children 63ba1cb240b7
comparison
equal deleted inserted replaced
19:f8c8a990688a 20:b5a1d5e43685
5 use Exporter ; 5 use Exporter ;
6 use Carp ; 6 use Carp ;
7 7
8 use LWP::Simple; 8 use LWP::Simple;
9 use LWP::UserAgent; 9 use LWP::UserAgent;
10 use WWW::Mechanize qw();
10 use URI::URL; 11 use URI::URL;
11 use SOAP::Lite; 12 use SOAP::Lite;
12 use Encode; 13 use Encode qw(encode_utf8);
13 use HTML::Template ; 14 use HTML::Template ;
14 use XML::Twig ; 15 use XML::Twig ;
15 use Text::CSV ; 16 use Text::CSV ;
16 17
17 use Data::Dumper ; 18 use Data::Dumper ;
141 } 142 }
142 return($hmdb_masses, $nb_masses) ; 143 return($hmdb_masses, $nb_masses) ;
143 } 144 }
144 ## END of SUB 145 ## END of SUB
145 146
146 =head2 METHOD test_matches_from_hmdb_ua 147 =head2 METHOD test_matches_from_hmdb_ua DEPRECATED
148
149 ## Description : [DEPRECATED]test a single query with tests parameters on hmdb - get the status of the complete server infra.
150 ## Input : none
151 ## Output : $status_line
152 ## Usage : my ( $status_line ) = test_matches_from_hmdb_ua( ) ;
153
154 =cut
155 ## START of SUB
156 sub test_matches_from_hmdb_ua {
157 ## Retrieve Values
158 my $self = shift ;
159
160 my @page = () ;
161
162 my $ua = new LWP::UserAgent;
163 $ua->agent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:64.0) Gecko/20100101 Firefox/64.0");
164
165 my $url = 'http://specdb.wishartlab.com/ms/search.csv';
166 my $header = ['Connection' => 'keep-alive', 'Content-Type' => 'application/x-www-form-urlencoded', 'Referer' => 'http://www.hmdb.ca/spectra/ms/search', 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'];
167
168 my $req = HTTP::Request->new('POST', $url, $header);
169 # print Dumper $req ;
170
171 ## Query format for HMDB 4.0
172 #"utf8=✓&authenticity_token=K1Ys7oyMKmVNm9n8p0jiTxBlh4G4OO0cqKZYnQKDCw0pM6zmm/CiBxv+/cXhuRsVFV98LLeAMJRN5dCyhIWlAA==&query_masses=175.01 238.19 420.16 780.32 956.25 1100.45&ms_search_ion_mode=positive&adduct_type[]=Unknown&tolerance=0.05&tolerance_units=Da&commit=Search"
173
174 $req->content('utf8=TRUE&mode=positive&query_masses=420.159317&tolerance=0.000001&database=HMDB&commit=Download Results As CSV');
175 # print Dumper $req ;
176 my $res = $ua->request($req);
177
178 print $res->as_string;
179 my $status_line = $res->status_line ;
180 ($status_line) = ($status_line =~ /(\d+)/);
181
182 return (\$status_line) ;
183 }
184 ## END of SUB
185
186 =head2 METHOD testMatchesFromHmdbWithUA
147 187
148 ## Description : test a single query with tests parameters on hmdb - get the status of the complete server infra. 188 ## Description : test a single query with tests parameters on hmdb - get the status of the complete server infra.
149 ## Input : none 189 ## Input : none
150 ## Output : $status_line 190 ## Output : $status_line
151 ## Usage : my ( $status_line ) = test_matches_from_hmdb_ua( ) ; 191 ## Usage : my ( $status_line ) = testMatchesFromHmdbWithUA( ) ;
152 192
153 =cut 193 =cut
154 ## START of SUB 194 ## START of SUB
155 sub test_matches_from_hmdb_ua { 195 sub testMatchesFromHmdbWithUA {
156 ## Retrieve Values 196 ## Retrieve Values
157 my $self = shift ; 197 my $self = shift ;
158 198
159 my @page = () ; 199 my @page = () ;
160 200 #based on https://stackoverflow.com/questions/17732916/perl-post-automation-and
161 my $ua = new LWP::UserAgent; 201
162 $ua->agent("Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36"); 202 my $mech = WWW::Mechanize->new(
163 203 agent => 'wonderbot for W4M 1.01',
164 my $req = HTTP::Request->new( 204 autocheck => 1,
165 POST => 'http://specdb.wishartlab.com/ms/search.csv'); 205 );
166 206
167 $req->content_type('application/x-www-form-urlencoded'); 207 my $statusGetLine = 0 ;
168 $req->content('utf8=TRUE&mode=positive&query_masses=420.159317&tolerance=0.000001&database=HMDB&commit=Download Results As CSV'); 208 my $statusPostLine = 0 ;
169 209
170 my $res = $ua->request($req); 210 #receiving cookies and authentication token (CFRS)
171 # print $res->as_string; 211 my $reqInit = $mech->get("http://www.hmdb.ca/spectra/ms/search");
172 my $status_line = $res->status_line ; 212 $statusGetLine = $mech->status() ;
173 ($status_line) = ($status_line =~ /(\d+)/); 213
174 214 if ($statusGetLine == 200 ) {
175 215 die 'no CSRF_REQUEST_TOKEN_VALUE in page found'
176 return (\$status_line) ; 216 unless ($reqInit->decoded_content =~ /\"csrf-token\"\s+content=\"(.*)\"/) ;
177 } 217 my $csrftoken = $1;
178 ## END of SUB 218 # print "\nTOKEN: $csrftoken\n" ;
219 $mech->add_header("X-CSRFToken", $csrftoken);
220 $mech->add_header('Connection', 'keep-alive');
221 $mech->add_header('Content-Type', 'application/x-www-form-urlencoded');
222 $mech->add_header('Referer', 'http://www.hmdb.ca/spectra/ms/search');
223 $mech->add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8');
224 }
225
226 ## POST test
227 my $top = 1 ;
228 while ($statusPostLine != 200 ) {
229
230 # Fix a limit at 3 tries...
231 if ($top < 4) {
232 print "\tTesting HMDB server connexion ($top time(s) )...\n" ;
233 $mech->post(
234 "http://specdb.wishartlab.com/ms/search.csv",
235 Content => 'utf8=TRUE&mode=positive&query_masses=420.159317&tolerance=0.000001&database=HMDB&commit=Download Results As CSV'
236 );
237
238 # print Dumper $mech ;
239 $statusPostLine = $mech->status() ;
240 }
241 else {
242 last ;
243 }
244 $top++ ;
245 }## End While
246
247 return (\$statusPostLine) ;
248 }
249 ## END of SUB
250
251
252
179 253
180 =head2 METHOD check_state_from_hmdb_ua 254 =head2 METHOD check_state_from_hmdb_ua
181 255
182 ## Description : check the thhp status of hmdb and kill correctly the script if necessary. 256 ## Description : check the thhp status of hmdb and kill correctly the script if necessary.
183 ## Input : $status 257 ## Input : $status
194 if (!defined $$status) { 268 if (!defined $$status) {
195 croak "No http status is defined for the distant server" ; 269 croak "No http status is defined for the distant server" ;
196 } 270 }
197 else { 271 else {
198 unless ( $$status == 200 ) { 272 unless ( $$status == 200 ) {
199 if ( $$status == 504 ) { croak "Gateway Timeout: The HMDB server was acting as a gateway or proxy and did not receive a timely response from the upstream server" ; } 273 if ( $$status == 502 ) { croak "Bad Gateway (502): The HMDB server, while acting as a gateway or proxy, received an invalid response from the upstream server. The Hmdb tool is stopped with error." ; }
200 if ( $$status == 500 ) { croak "Internal Server Error: The HMDB server returns an unexpected internal server error" ; } 274 if ( $$status == 504 ) { croak "Gateway Timeout (504): The HMDB server was acting as a gateway or proxy and did not receive a timely response from the upstream server. The Hmdb tool is stopped with error." ; }
275 if ( $$status == 500 ) { croak "Internal Server Error (500): The HMDB server returns an unexpected internal server error. The Hmdb tool is stopped with error." ; }
201 else { 276 else {
202 ## None supported http code error ## 277 ## None supported http code error ##
203 croak "Internal Server Error $$status..." ; 278 croak "Internal Server Error $$status..." ;
204 } 279 }
205 } 280 }
208 283
209 return (1) ; 284 return (1) ;
210 } 285 }
211 ## END of SUB 286 ## END of SUB
212 287
213 =head2 METHOD get_matches_from_hmdb_ua 288 =head2 METHOD get_matches_from_hmdb_ua DEPRECATED
214 289
215 ## Description : HMDB querying via an user agent with parameters : mz, delta and molecular species (neutral, pos, neg) 290 ## Description : [DEPRECATED]HMDB querying via an user agent with parameters : mz, delta and molecular species (neutral, pos, neg)
216 ## Input : $mass, $delta, $mode 291 ## Input : $mass, $delta, $mode
217 ## Output : $results 292 ## Output : $results
218 ## Usage : my ( $results ) = get_matches_from_hmdb( $mass, $delta, $mode ) ; 293 ## Usage : my ( $results ) = get_matches_from_hmdb( $mass, $delta, $mode ) ;
219 294
220 =cut 295 =cut
225 my ( $masses, $delta, $mode ) = @_ ; 300 my ( $masses, $delta, $mode ) = @_ ;
226 301
227 my @page = () ; 302 my @page = () ;
228 303
229 my $ua = LWP::UserAgent->new( keep_alive => 10 ); 304 my $ua = LWP::UserAgent->new( keep_alive => 10 );
230 $ua->agent("Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36"); 305 $ua->agent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:64.0) Gecko/20100101 Firefox/64.0");
231 $ua->timeout(2400) ; 306 $ua->timeout(2400) ;
307
308 # Cookies
309 # my $cookie = new HTTP::Cookies( ignore_discard => 1 );
310 # $ua->cookie_jar( $cookie );
232 311
233 my $req = HTTP::Request->new( 312 # my $req = HTTP::Request->new(
234 POST => 'http://specdb.wishartlab.com/ms/search.csv'); 313 # POST => 'http://specdb.wishartlab.com/ms/search.csv');
235 314
236 $req->content_type('application/x-www-form-urlencoded'); 315 my $url = 'http://specdb.wishartlab.com/ms/search.csv';
316 my $header = ['Content-Type' => 'application/x-www-form-urlencoded', 'Referer' => 'http://www.hmdb.ca/spectra/ms/search', 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'];
317 my $data = {mode => $mode, query_masses => $masses, 'tolerance' => $delta, database => 'HMDB', commit => 'Download Results As CSV'};
318 my $encoded_data = encode_utf8($data);
319
320 my $req = HTTP::Request->new('POST', $url, $header, $encoded_data);
321
322 # $req->content_type('application/x-www-form-urlencoded');
237 $req->content('utf8=TRUE&mode='.$mode.'&query_masses='.$masses.'&tolerance='.$delta.'&database=HMDB&commit=Download Results As CSV'); 323 $req->content('utf8=TRUE&mode='.$mode.'&query_masses='.$masses.'&tolerance='.$delta.'&database=HMDB&commit=Download Results As CSV');
238 # print Dumper $req ; 324 # print Dumper $req ;
239 my $res = $ua->request($req); 325 my $res = $ua->request($req);
240 my $status_line = undef ; 326 my $status_line = undef ;
241 # print $res->as_string; 327 print $res->as_string;
242 if ($res->is_success) { 328 if ($res->is_success) {
243 @page = split ( /\n/, $res->decoded_content ) ; 329 @page = split ( /\n/, $res->decoded_content ) ;
244 $status_line = 'OK' ; 330 $status_line = 'OK' ;
245 } else { 331 } else {
246 $status_line = $res->status_line ; 332 $status_line = $res->status_line ;
250 336
251 337
252 return (\@page, $status_line) ; 338 return (\@page, $status_line) ;
253 } 339 }
254 ## END of SUB 340 ## END of SUB
341
342 =head2 METHOD getMatchesFromHmdbWithUA
343
344 ## Description : HMDB querying via an user agent with parameters : mz, delta and molecular species (neutral, pos, neg)
345 ## Input : $mass, $delta, $mode
346 ## Output : $results
347 ## Usage : my ( $results ) = getMatchesFromHmdbWithUA( $mass, $delta, $mode ) ;
348
349 =cut
350 ## START of SUB
351 sub getMatchesFromHmdbWithUA {
352 ## Retrieve Values
353 my $self = shift ;
354 my ( $masses, $delta, $mode ) = @_ ;
355
356 my @page = () ;
357
358 #based on https://stackoverflow.com/questions/17732916/perl-post-automation-and
359
360 my $mech = WWW::Mechanize->new(
361 agent => 'wonderbot for W4M 1.01',
362 autocheck => 1,
363 timeout => 2400,
364 );
365
366 my $statusGetLine = 0 ;
367 my $statusPostLine = 0 ;
368
369 #receiving cookies and authentication token (CFRS)
370 my $reqInit = $mech->get("http://www.hmdb.ca/spectra/ms/search");
371 $statusGetLine = $mech->status() ;
372
373 if ($statusGetLine == 200 ) {
374 die 'no CSRF_REQUEST_TOKEN_VALUE in page found'
375 unless ($reqInit->decoded_content =~ /\"csrf-token\"\s+content=\"(.*)\"/) ;
376 my $csrftoken = $1;
377 # print "\nTOKEN: $csrftoken\n" ;
378 $mech->add_header("X-CSRFToken", $csrftoken);
379 $mech->add_header('Connection', 'keep-alive');
380 $mech->add_header('Content-Type', 'application/x-www-form-urlencoded');
381 $mech->add_header('Referer', 'http://www.hmdb.ca/spectra/ms/search');
382 $mech->add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8');
383 }
384
385 my $res = $mech->post(
386 "http://specdb.wishartlab.com/ms/search.csv",
387 Content => 'utf8=TRUE&mode='
388 .$mode.'&query_masses='
389 .$masses.'&tolerance='
390 .$delta.'&database=HMDB&commit=Download Results As CSV'
391 );
392
393 if ($mech->success) {
394 @page = split ( /\n/, $res->decoded_content ) ;
395 $statusPostLine = 'OK' ;
396 }
397 else {
398 $statusPostLine = $mech->status() ;
399 warn "\t[HMDB service issue !! the server returned a $statusPostLine HTTP error]" ;
400 }
401
402 # print Dumper $res->decoded_content ;
403
404 return (\@page, $statusPostLine) ;
405 }
406 ## END of SUB
407
255 408
256 =head2 METHOD parse_hmdb_csv_results 409 =head2 METHOD parse_hmdb_csv_results
257 410
258 ## Description : parse the csv results and get data 411 ## Description : parse the csv results and get data
259 ## Input : $csv 412 ## Input : $csv