Mercurial > repos > fgiacomoni > hmdb_ms_search
comparison lib/hmdb.pm @ 20:b5a1d5e43685 draft
Master branch Updating - - Fxx
author | fgiacomoni |
---|---|
date | Wed, 23 Jan 2019 07:49:34 -0500 |
parents | 49f87ddb2c78 |
children | 63ba1cb240b7 |
comparison
equal
deleted
inserted
replaced
19:f8c8a990688a | 20:b5a1d5e43685 |
---|---|
5 use Exporter ; | 5 use Exporter ; |
6 use Carp ; | 6 use Carp ; |
7 | 7 |
8 use LWP::Simple; | 8 use LWP::Simple; |
9 use LWP::UserAgent; | 9 use LWP::UserAgent; |
10 use WWW::Mechanize qw(); | |
10 use URI::URL; | 11 use URI::URL; |
11 use SOAP::Lite; | 12 use SOAP::Lite; |
12 use Encode; | 13 use Encode qw(encode_utf8); |
13 use HTML::Template ; | 14 use HTML::Template ; |
14 use XML::Twig ; | 15 use XML::Twig ; |
15 use Text::CSV ; | 16 use Text::CSV ; |
16 | 17 |
17 use Data::Dumper ; | 18 use Data::Dumper ; |
141 } | 142 } |
142 return($hmdb_masses, $nb_masses) ; | 143 return($hmdb_masses, $nb_masses) ; |
143 } | 144 } |
144 ## END of SUB | 145 ## END of SUB |
145 | 146 |
146 =head2 METHOD test_matches_from_hmdb_ua | 147 =head2 METHOD test_matches_from_hmdb_ua DEPRECATED |
148 | |
149 ## Description : [DEPRECATED]test a single query with tests parameters on hmdb - get the status of the complete server infra. | |
150 ## Input : none | |
151 ## Output : $status_line | |
152 ## Usage : my ( $status_line ) = test_matches_from_hmdb_ua( ) ; | |
153 | |
154 =cut | |
155 ## START of SUB | |
156 sub test_matches_from_hmdb_ua { | |
157 ## Retrieve Values | |
158 my $self = shift ; | |
159 | |
160 my @page = () ; | |
161 | |
162 my $ua = new LWP::UserAgent; | |
163 $ua->agent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:64.0) Gecko/20100101 Firefox/64.0"); | |
164 | |
165 my $url = 'http://specdb.wishartlab.com/ms/search.csv'; | |
166 my $header = ['Connection' => 'keep-alive', 'Content-Type' => 'application/x-www-form-urlencoded', 'Referer' => 'http://www.hmdb.ca/spectra/ms/search', 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8']; | |
167 | |
168 my $req = HTTP::Request->new('POST', $url, $header); | |
169 # print Dumper $req ; | |
170 | |
171 ## Query format for HMDB 4.0 | |
172 #"utf8=✓&authenticity_token=K1Ys7oyMKmVNm9n8p0jiTxBlh4G4OO0cqKZYnQKDCw0pM6zmm/CiBxv+/cXhuRsVFV98LLeAMJRN5dCyhIWlAA==&query_masses=175.01 238.19 420.16 780.32 956.25 1100.45&ms_search_ion_mode=positive&adduct_type[]=Unknown&tolerance=0.05&tolerance_units=Da&commit=Search" | |
173 | |
174 $req->content('utf8=TRUE&mode=positive&query_masses=420.159317&tolerance=0.000001&database=HMDB&commit=Download Results As CSV'); | |
175 # print Dumper $req ; | |
176 my $res = $ua->request($req); | |
177 | |
178 print $res->as_string; | |
179 my $status_line = $res->status_line ; | |
180 ($status_line) = ($status_line =~ /(\d+)/); | |
181 | |
182 return (\$status_line) ; | |
183 } | |
184 ## END of SUB | |
185 | |
186 =head2 METHOD testMatchesFromHmdbWithUA | |
147 | 187 |
148 ## Description : test a single query with tests parameters on hmdb - get the status of the complete server infra. | 188 ## Description : test a single query with tests parameters on hmdb - get the status of the complete server infra. |
149 ## Input : none | 189 ## Input : none |
150 ## Output : $status_line | 190 ## Output : $status_line |
151 ## Usage : my ( $status_line ) = test_matches_from_hmdb_ua( ) ; | 191 ## Usage : my ( $status_line ) = testMatchesFromHmdbWithUA( ) ; |
152 | 192 |
153 =cut | 193 =cut |
154 ## START of SUB | 194 ## START of SUB |
155 sub test_matches_from_hmdb_ua { | 195 sub testMatchesFromHmdbWithUA { |
156 ## Retrieve Values | 196 ## Retrieve Values |
157 my $self = shift ; | 197 my $self = shift ; |
158 | 198 |
159 my @page = () ; | 199 my @page = () ; |
160 | 200 #based on https://stackoverflow.com/questions/17732916/perl-post-automation-and |
161 my $ua = new LWP::UserAgent; | 201 |
162 $ua->agent("Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36"); | 202 my $mech = WWW::Mechanize->new( |
163 | 203 agent => 'wonderbot for W4M 1.01', |
164 my $req = HTTP::Request->new( | 204 autocheck => 1, |
165 POST => 'http://specdb.wishartlab.com/ms/search.csv'); | 205 ); |
166 | 206 |
167 $req->content_type('application/x-www-form-urlencoded'); | 207 my $statusGetLine = 0 ; |
168 $req->content('utf8=TRUE&mode=positive&query_masses=420.159317&tolerance=0.000001&database=HMDB&commit=Download Results As CSV'); | 208 my $statusPostLine = 0 ; |
169 | 209 |
170 my $res = $ua->request($req); | 210 #receiving cookies and authentication token (CFRS) |
171 # print $res->as_string; | 211 my $reqInit = $mech->get("http://www.hmdb.ca/spectra/ms/search"); |
172 my $status_line = $res->status_line ; | 212 $statusGetLine = $mech->status() ; |
173 ($status_line) = ($status_line =~ /(\d+)/); | 213 |
174 | 214 if ($statusGetLine == 200 ) { |
175 | 215 die 'no CSRF_REQUEST_TOKEN_VALUE in page found' |
176 return (\$status_line) ; | 216 unless ($reqInit->decoded_content =~ /\"csrf-token\"\s+content=\"(.*)\"/) ; |
177 } | 217 my $csrftoken = $1; |
178 ## END of SUB | 218 # print "\nTOKEN: $csrftoken\n" ; |
219 $mech->add_header("X-CSRFToken", $csrftoken); | |
220 $mech->add_header('Connection', 'keep-alive'); | |
221 $mech->add_header('Content-Type', 'application/x-www-form-urlencoded'); | |
222 $mech->add_header('Referer', 'http://www.hmdb.ca/spectra/ms/search'); | |
223 $mech->add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'); | |
224 } | |
225 | |
226 ## POST test | |
227 my $top = 1 ; | |
228 while ($statusPostLine != 200 ) { | |
229 | |
230 # Fix a limit at 3 tries... | |
231 if ($top < 4) { | |
232 print "\tTesting HMDB server connexion ($top time(s) )...\n" ; | |
233 $mech->post( | |
234 "http://specdb.wishartlab.com/ms/search.csv", | |
235 Content => 'utf8=TRUE&mode=positive&query_masses=420.159317&tolerance=0.000001&database=HMDB&commit=Download Results As CSV' | |
236 ); | |
237 | |
238 # print Dumper $mech ; | |
239 $statusPostLine = $mech->status() ; | |
240 } | |
241 else { | |
242 last ; | |
243 } | |
244 $top++ ; | |
245 }## End While | |
246 | |
247 return (\$statusPostLine) ; | |
248 } | |
249 ## END of SUB | |
250 | |
251 | |
252 | |
179 | 253 |
180 =head2 METHOD check_state_from_hmdb_ua | 254 =head2 METHOD check_state_from_hmdb_ua |
181 | 255 |
182 ## Description : check the thhp status of hmdb and kill correctly the script if necessary. | 256 ## Description : check the thhp status of hmdb and kill correctly the script if necessary. |
183 ## Input : $status | 257 ## Input : $status |
194 if (!defined $$status) { | 268 if (!defined $$status) { |
195 croak "No http status is defined for the distant server" ; | 269 croak "No http status is defined for the distant server" ; |
196 } | 270 } |
197 else { | 271 else { |
198 unless ( $$status == 200 ) { | 272 unless ( $$status == 200 ) { |
199 if ( $$status == 504 ) { croak "Gateway Timeout: The HMDB server was acting as a gateway or proxy and did not receive a timely response from the upstream server" ; } | 273 if ( $$status == 502 ) { croak "Bad Gateway (502): The HMDB server, while acting as a gateway or proxy, received an invalid response from the upstream server. The Hmdb tool is stopped with error." ; } |
200 if ( $$status == 500 ) { croak "Internal Server Error: The HMDB server returns an unexpected internal server error" ; } | 274 if ( $$status == 504 ) { croak "Gateway Timeout (504): The HMDB server was acting as a gateway or proxy and did not receive a timely response from the upstream server. The Hmdb tool is stopped with error." ; } |
275 if ( $$status == 500 ) { croak "Internal Server Error (500): The HMDB server returns an unexpected internal server error. The Hmdb tool is stopped with error." ; } | |
201 else { | 276 else { |
202 ## None supported http code error ## | 277 ## None supported http code error ## |
203 croak "Internal Server Error $$status..." ; | 278 croak "Internal Server Error $$status..." ; |
204 } | 279 } |
205 } | 280 } |
208 | 283 |
209 return (1) ; | 284 return (1) ; |
210 } | 285 } |
211 ## END of SUB | 286 ## END of SUB |
212 | 287 |
213 =head2 METHOD get_matches_from_hmdb_ua | 288 =head2 METHOD get_matches_from_hmdb_ua DEPRECATED |
214 | 289 |
215 ## Description : HMDB querying via an user agent with parameters : mz, delta and molecular species (neutral, pos, neg) | 290 ## Description : [DEPRECATED]HMDB querying via an user agent with parameters : mz, delta and molecular species (neutral, pos, neg) |
216 ## Input : $mass, $delta, $mode | 291 ## Input : $mass, $delta, $mode |
217 ## Output : $results | 292 ## Output : $results |
218 ## Usage : my ( $results ) = get_matches_from_hmdb( $mass, $delta, $mode ) ; | 293 ## Usage : my ( $results ) = get_matches_from_hmdb( $mass, $delta, $mode ) ; |
219 | 294 |
220 =cut | 295 =cut |
225 my ( $masses, $delta, $mode ) = @_ ; | 300 my ( $masses, $delta, $mode ) = @_ ; |
226 | 301 |
227 my @page = () ; | 302 my @page = () ; |
228 | 303 |
229 my $ua = LWP::UserAgent->new( keep_alive => 10 ); | 304 my $ua = LWP::UserAgent->new( keep_alive => 10 ); |
230 $ua->agent("Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36"); | 305 $ua->agent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:64.0) Gecko/20100101 Firefox/64.0"); |
231 $ua->timeout(2400) ; | 306 $ua->timeout(2400) ; |
307 | |
308 # Cookies | |
309 # my $cookie = new HTTP::Cookies( ignore_discard => 1 ); | |
310 # $ua->cookie_jar( $cookie ); | |
232 | 311 |
233 my $req = HTTP::Request->new( | 312 # my $req = HTTP::Request->new( |
234 POST => 'http://specdb.wishartlab.com/ms/search.csv'); | 313 # POST => 'http://specdb.wishartlab.com/ms/search.csv'); |
235 | 314 |
236 $req->content_type('application/x-www-form-urlencoded'); | 315 my $url = 'http://specdb.wishartlab.com/ms/search.csv'; |
316 my $header = ['Content-Type' => 'application/x-www-form-urlencoded', 'Referer' => 'http://www.hmdb.ca/spectra/ms/search', 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8']; | |
317 my $data = {mode => $mode, query_masses => $masses, 'tolerance' => $delta, database => 'HMDB', commit => 'Download Results As CSV'}; | |
318 my $encoded_data = encode_utf8($data); | |
319 | |
320 my $req = HTTP::Request->new('POST', $url, $header, $encoded_data); | |
321 | |
322 # $req->content_type('application/x-www-form-urlencoded'); | |
237 $req->content('utf8=TRUE&mode='.$mode.'&query_masses='.$masses.'&tolerance='.$delta.'&database=HMDB&commit=Download Results As CSV'); | 323 $req->content('utf8=TRUE&mode='.$mode.'&query_masses='.$masses.'&tolerance='.$delta.'&database=HMDB&commit=Download Results As CSV'); |
238 # print Dumper $req ; | 324 # print Dumper $req ; |
239 my $res = $ua->request($req); | 325 my $res = $ua->request($req); |
240 my $status_line = undef ; | 326 my $status_line = undef ; |
241 # print $res->as_string; | 327 print $res->as_string; |
242 if ($res->is_success) { | 328 if ($res->is_success) { |
243 @page = split ( /\n/, $res->decoded_content ) ; | 329 @page = split ( /\n/, $res->decoded_content ) ; |
244 $status_line = 'OK' ; | 330 $status_line = 'OK' ; |
245 } else { | 331 } else { |
246 $status_line = $res->status_line ; | 332 $status_line = $res->status_line ; |
250 | 336 |
251 | 337 |
252 return (\@page, $status_line) ; | 338 return (\@page, $status_line) ; |
253 } | 339 } |
254 ## END of SUB | 340 ## END of SUB |
341 | |
342 =head2 METHOD getMatchesFromHmdbWithUA | |
343 | |
344 ## Description : HMDB querying via an user agent with parameters : mz, delta and molecular species (neutral, pos, neg) | |
345 ## Input : $mass, $delta, $mode | |
346 ## Output : $results | |
347 ## Usage : my ( $results ) = getMatchesFromHmdbWithUA( $mass, $delta, $mode ) ; | |
348 | |
349 =cut | |
350 ## START of SUB | |
351 sub getMatchesFromHmdbWithUA { | |
352 ## Retrieve Values | |
353 my $self = shift ; | |
354 my ( $masses, $delta, $mode ) = @_ ; | |
355 | |
356 my @page = () ; | |
357 | |
358 #based on https://stackoverflow.com/questions/17732916/perl-post-automation-and | |
359 | |
360 my $mech = WWW::Mechanize->new( | |
361 agent => 'wonderbot for W4M 1.01', | |
362 autocheck => 1, | |
363 timeout => 2400, | |
364 ); | |
365 | |
366 my $statusGetLine = 0 ; | |
367 my $statusPostLine = 0 ; | |
368 | |
369 #receiving cookies and authentication token (CFRS) | |
370 my $reqInit = $mech->get("http://www.hmdb.ca/spectra/ms/search"); | |
371 $statusGetLine = $mech->status() ; | |
372 | |
373 if ($statusGetLine == 200 ) { | |
374 die 'no CSRF_REQUEST_TOKEN_VALUE in page found' | |
375 unless ($reqInit->decoded_content =~ /\"csrf-token\"\s+content=\"(.*)\"/) ; | |
376 my $csrftoken = $1; | |
377 # print "\nTOKEN: $csrftoken\n" ; | |
378 $mech->add_header("X-CSRFToken", $csrftoken); | |
379 $mech->add_header('Connection', 'keep-alive'); | |
380 $mech->add_header('Content-Type', 'application/x-www-form-urlencoded'); | |
381 $mech->add_header('Referer', 'http://www.hmdb.ca/spectra/ms/search'); | |
382 $mech->add_header('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'); | |
383 } | |
384 | |
385 my $res = $mech->post( | |
386 "http://specdb.wishartlab.com/ms/search.csv", | |
387 Content => 'utf8=TRUE&mode=' | |
388 .$mode.'&query_masses=' | |
389 .$masses.'&tolerance=' | |
390 .$delta.'&database=HMDB&commit=Download Results As CSV' | |
391 ); | |
392 | |
393 if ($mech->success) { | |
394 @page = split ( /\n/, $res->decoded_content ) ; | |
395 $statusPostLine = 'OK' ; | |
396 } | |
397 else { | |
398 $statusPostLine = $mech->status() ; | |
399 warn "\t[HMDB service issue !! the server returned a $statusPostLine HTTP error]" ; | |
400 } | |
401 | |
402 # print Dumper $res->decoded_content ; | |
403 | |
404 return (\@page, $statusPostLine) ; | |
405 } | |
406 ## END of SUB | |
407 | |
255 | 408 |
256 =head2 METHOD parse_hmdb_csv_results | 409 =head2 METHOD parse_hmdb_csv_results |
257 | 410 |
258 ## Description : parse the csv results and get data | 411 ## Description : parse the csv results and get data |
259 ## Input : $csv | 412 ## Input : $csv |