comparison lib/hmdb.pm @ 1:6d0a0f8f672a draft

planemo upload commit f67323ae4fa7fdbd9f4518ede105a7d7cd44b471
author fgiacomoni
date Wed, 23 Nov 2016 09:31:50 -0500
parents
children 6091a80df951
comparison
equal deleted inserted replaced
0:9583f9772198 1:6d0a0f8f672a
1 package lib::hmdb ;
2
3 use strict;
4 use warnings ;
5 use Exporter ;
6 use Carp ;
7
8 use LWP::Simple;
9 use LWP::UserAgent;
10 use URI::URL;
11 use SOAP::Lite;
12 use Encode;
13 use HTML::Template ;
14 use XML::Twig ;
15 use Text::CSV ;
16
17 use Data::Dumper ;
18
19 use vars qw($VERSION @ISA @EXPORT %EXPORT_TAGS);
20
21 our $VERSION = "1.0";
22 our @ISA = qw(Exporter);
23 our @EXPORT = qw( map_suppl_data_on_hmdb_results get_unik_ids_from_results get_hmdb_metabocard_from_id extract_sub_mz_lists test_matches_from_hmdb_ua prepare_multi_masses_query get_matches_from_hmdb_ua parse_hmdb_csv_results set_html_tbody_object add_mz_to_tbody_object add_entries_to_tbody_object write_html_skel set_lm_matrix_object set_hmdb_matrix_object_with_ids add_lm_matrix_to_input_matrix write_csv_skel write_csv_one_mass );
24 our %EXPORT_TAGS = ( ALL => [qw( map_suppl_data_on_hmdb_results get_unik_ids_from_results get_hmdb_metabocard_from_id extract_sub_mz_lists test_matches_from_hmdb_ua prepare_multi_masses_query get_matches_from_hmdb_ua parse_hmdb_csv_results set_html_tbody_object add_mz_to_tbody_object add_entries_to_tbody_object write_html_skel set_lm_matrix_object set_hmdb_matrix_object_with_ids add_lm_matrix_to_input_matrix write_csv_skel write_csv_one_mass )] );
25
26 =head1 NAME
27
28 My::Module - An example module
29
30 =head1 SYNOPSIS
31
32 use My::Module;
33 my $object = My::Module->new();
34 print $object->as_string;
35
36 =head1 DESCRIPTION
37
38 This module does not really exist, it
39 was made for the sole purpose of
40 demonstrating how POD works.
41
42 =head1 METHODS
43
44 Methods are :
45
46 =head2 METHOD new
47
48 ## Description : new
49 ## Input : $self
50 ## Ouput : bless $self ;
51 ## Usage : new() ;
52
53 =cut
54
55 sub new {
56 ## Variables
57 my $self={};
58 bless($self) ;
59 return $self ;
60 }
61 ### END of SUB
62
63
64 =head2 METHOD extract_sub_mz_lists
65
66 ## Description : extract a couples of sublist from a long mz list (more than $HMDB_LIMITS)
67 ## Input : $HMDB_LIMITS, $masses
68 ## Output : $sublists
69 ## Usage : my ( $sublists ) = extract_sub_mz_lists( $HMDB_LIMITS, $masses ) ;
70
71 =cut
72 ## START of SUB
73 sub extract_sub_mz_lists {
74 ## Retrieve Values
75 my $self = shift ;
76 my ( $masses, $HMDB_LIMITS ) = @_ ;
77
78 my ( @sublists, @sublist ) = ( (), () ) ;
79 my $nb_mz = 0 ;
80 my $nb_total_mzs = scalar(@{$masses}) ;
81
82 if ($nb_total_mzs == 0) {
83 die "The provided mzs list is empty" ;
84 }
85
86 for ( my $current_pos = 0 ; $current_pos < $nb_total_mzs ; $current_pos++ ) {
87
88 if ( $nb_mz < $HMDB_LIMITS ) {
89 if ( $masses->[$current_pos] ) { push (@sublist, $masses->[$current_pos]) ; $nb_mz++ ; } # build sub list
90 }
91 elsif ( $nb_mz == $HMDB_LIMITS ) {
92 my @tmp = @sublist ; push (@sublists, \@tmp) ; @sublist = () ; $nb_mz = 0 ;
93 $current_pos-- ;
94 }
95 if ($current_pos == $nb_total_mzs-1) { my @tmp = @sublist ; push (@sublists, \@tmp) ; }
96 }
97 return(\@sublists) ;
98 }
99 ## END of SUB
100
101 =head2 METHOD prepare_multi_masses_query
102
103 ## Description : Generate the adapted format of the mz list for HMDB
104 ## Input : $masses
105 ## Output : $hmdb_masses
106 ## Usage : my ( $hmdb_masses ) = prepare_multi_masses_query( $masses ) ;
107
108 =cut
109 ## START of SUB
110 sub prepare_multi_masses_query {
111 ## Retrieve Values
112 my $self = shift ;
113 my ( $masses ) = @_ ;
114
115 my $hmdb_masses = undef ;
116 my $sep = '%0D%0A' ; ## retour chariot encode
117 my ($nb_masses, $i) = (0, 0) ;
118
119 if ( defined $masses ) {
120 my @masses = @{$masses} ;
121 my $nb_masses = scalar ( @masses ) ;
122 if ( $nb_masses == 0 ) { croak "The input method parameter mass list is empty" ; }
123 elsif ( $nb_masses >= 150 ) { croak "Your mass list is too long : HMDB allows maximum 150 query masses per request \n" ; } ## Del it --- temporary patch
124
125 foreach my $mass (@masses) {
126
127 if ($i < $nb_masses) {
128 $hmdb_masses .= $mass.$sep ;
129 }
130 elsif ( $i == $nb_masses ) {
131 $hmdb_masses .= $mass ;
132 }
133 else {
134 last ;
135 }
136 $i ++ ;
137 }
138 }
139 else {
140 croak "No mass list found \n" ;
141 }
142 return($hmdb_masses, $nb_masses) ;
143 }
144 ## END of SUB
145
146 =head2 METHOD test_matches_from_hmdb_ua
147
148 ## Description : test a single query with tests parameters on hmdb - get the status of the complete server infra.
149 ## Input : none
150 ## Output : $status_line
151 ## Usage : my ( $status_line ) = test_matches_from_hmdb_ua( ) ;
152
153 =cut
154 ## START of SUB
155 sub test_matches_from_hmdb_ua {
156 ## Retrieve Values
157 my $self = shift ;
158
159 my @page = () ;
160
161 my $ua = new LWP::UserAgent;
162 $ua->agent("Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36");
163
164 my $req = HTTP::Request->new(
165 POST => 'http://specdb.wishartlab.com/ms/search.csv');
166
167 $req->content_type('application/x-www-form-urlencoded');
168 $req->content('utf8=TRUE&mode=positive&query_masses=420.159317&tolerance=0.000001&database=HMDB&commit=Download Results As CSV');
169
170 my $res = $ua->request($req);
171 # print $res->as_string;
172 my $status_line = $res->status_line ;
173 ($status_line) = ($status_line =~ /(\d+)/);
174
175
176 return (\$status_line) ;
177 }
178 ## END of SUB
179
180 =head2 METHOD check_state_from_hmdb_ua
181
182 ## Description : check the thhp status of hmdb and kill correctly the script if necessary.
183 ## Input : $status
184 ## Output : none
185 ## Usage : check_state_from_hmdb_ua($status) ;
186
187 =cut
188 ## START of SUB
189 sub check_state_from_hmdb_ua {
190 ## Retrieve Values
191 my $self = shift ;
192 my ($status) = @_ ;
193
194 if (!defined $$status) {
195 croak "No http status is defined for the distant server" ;
196 }
197 else {
198 unless ( $$status == 200 ) {
199 if ( $$status == 504 ) { croak "Gateway Timeout: The HMDB server was acting as a gateway or proxy and did not receive a timely response from the upstream server" ; }
200 else {
201 ## None supported http code error ##
202 }
203 }
204 }
205
206 return (1) ;
207 }
208 ## END of SUB
209
210 =head2 METHOD get_matches_from_hmdb_ua
211
212 ## Description : HMDB querying via an user agent with parameters : mz, delta and molecular species (neutral, pos, neg)
213 ## Input : $mass, $delta, $mode
214 ## Output : $results
215 ## Usage : my ( $results ) = get_matches_from_hmdb( $mass, $delta, $mode ) ;
216
217 =cut
218 ## START of SUB
219 sub get_matches_from_hmdb_ua {
220 ## Retrieve Values
221 my $self = shift ;
222 my ( $masses, $delta, $mode ) = @_ ;
223
224 my @page = () ;
225
226 my $ua = new LWP::UserAgent;
227 $ua->agent("Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36");
228 $ua->timeout(500);
229
230 my $req = HTTP::Request->new(
231 POST => 'http://specdb.wishartlab.com/ms/search.csv');
232
233 $req->content_type('application/x-www-form-urlencoded');
234 $req->content('utf8=TRUE&mode='.$mode.'&query_masses='.$masses.'&tolerance='.$delta.'&database=HMDB&commit=Download Results As CSV');
235
236 my $res = $ua->request($req);
237 # print $res->as_string;
238 if ($res->is_success) {
239 @page = split ( /\n/, $res->decoded_content ) ;
240 } else {
241 my $status_line = $res->status_line ;
242 ($status_line) = ($status_line =~ /(\d+)/);
243 croak "HMDB service none available !! Status of the HMDB server is : $status_line\n" ;
244 }
245
246
247 return (\@page) ;
248 }
249 ## END of SUB
250
251 =head2 METHOD parse_hmdb_csv_results
252
253 ## Description : parse the csv results and get data
254 ## Input : $csv
255 ## Output : $results
256 ## Usage : my ( $results ) = parse_hmdb_csv_results( $csv ) ;
257
258 =cut
259 ## START of SUB
260 sub parse_hmdb_csv_results {
261 ## Retrieve Values
262 my $self = shift ;
263 my ( $csv, $masses, $max_query ) = @_ ;
264
265 my $test = 0 ;
266 my ($query_mass,$compound_id,$formula,$compound_mass,$adduct,$adduct_type,$adduct_mass,$delta) = (0, undef, undef, undef, undef, undef, undef, undef) ;
267
268 my %result_by_entry = () ;
269 my %features = () ;
270
271 # print Dumper $csv ;
272
273 foreach my $line (@{$csv}) {
274
275 if ($line !~ /query_mass,compound_id,formula,compound_mass,adduct,adduct_type,adduct_mass,delta/) {
276 my @entry = split(/,/, $line) ;
277
278 if ( !exists $result_by_entry{$entry[0]} ) { $result_by_entry{$entry[0]} = [] ; }
279
280 $features{ENTRY_ENTRY_ID} = $entry[1] ;
281 $features{ENTRY_FORMULA} = $entry[2] ;
282 $features{ENTRY_CPD_MZ} = $entry[3] ;
283 $features{ENTRY_ADDUCT} = $entry[4] ;
284 $features{ENTRY_ADDUCT_TYPE} = $entry[5] ;
285 $features{ENTRY_ADDUCT_MZ} = $entry[6] ;
286 $features{ENTRY_DELTA} = $entry[7] ;
287
288 my %temp = %features ;
289
290 push (@{$result_by_entry{$entry[0]} }, \%temp) ;
291 }
292 else {
293 next ;
294 }
295 } ## end foreach
296
297 ## manage per query_mzs (keep query masses order by array)
298 my @results = () ;
299 foreach (@{$masses}) {
300 if ($result_by_entry{$_}) {
301
302 ## cut all entries > $max_query
303 my @temp_entries = @{$result_by_entry{$_}} ;
304 my @temp_cut = () ;
305 my $current_query = 0 ;
306 foreach (@temp_entries) {
307 $current_query ++ ;
308 if ($current_query > $max_query) {
309 last ;
310 }
311 else {
312 push (@temp_cut, $_) ;
313 }
314 }
315 push (@results, \@temp_cut) ;
316 # push (@results, $result_by_entry{$_}) ;
317 }
318 else {push (@results, [] ) ;} ;
319
320 }
321 return(\@results) ;
322 }
323 ## END of SUB
324
325 =head2 METHOD parse_hmdb_page_results
326
327 ## Description : [DEPRECATED] old HMDB html page parser
328 ## Input : $page
329 ## Output : $results
330 ## Usage : my ( $results ) = parse_hmdb_page_result( $pages ) ;
331
332 =cut
333 ## START of SUB
334 sub parse_hmdb_page_results {
335 ## Retrieve Values
336 my $self = shift ;
337 my ( $page ) = @_ ;
338
339 my @results = () ;
340 my ($catch_table, $catch_name) = (0, 0) ;
341 my ($name, $adduct, $adduct_mw, $cpd_mw, $delta) = (undef, undef, undef, undef, undef) ;
342
343 if ( defined $page ) {
344
345 my @page = @{$page} ;
346 my $ID = undef ;
347 my @result_by_mz = () ;
348 my %result_by_entry = () ;
349
350 foreach my $line (@page) {
351
352 #Section de la page contenant les resultat
353 if( $line =~/<table>/ ) { $catch_table = 1 ; }
354
355 ## Si il existe un resultat :
356 if($catch_table == 1) {
357
358 #Id de la molecule, et creation du lien
359 if( $line =~ /<a href=\"\/metabolites\/(\w+)\" (.*)>/ ) {
360 $ID = $1 ;
361 $catch_name = 0 ;
362 next ;
363 }
364 #Nom de la molecule ONLY!!
365 if ( $catch_name == 0 ) {
366
367 if( $line =~ /<td>(.+)<\/td>/ ) {
368
369 if ( !defined $name ) {
370 $name = $1 ;
371 $result_by_entry{'ENTRY_ENTRY_ID'} = $ID ;
372 $result_by_entry{'ENTRY_NAME'} = $name ;
373 next ;
374 }
375 if ( !defined $adduct ) { $adduct = $1 ; $result_by_entry{'ENTRY_ADDUCT'} = $adduct ; next ; }
376 if ( !defined $adduct_mw ) { $adduct_mw = $1 ; $result_by_entry{'ENTRY_ADDUCT_MZ'} = $adduct_mw ; next ; }
377 if ( !defined $cpd_mw ) { $cpd_mw = $1 ; $result_by_entry{'ENTRY_CPD_MZ'} = $cpd_mw ; next ; }
378 if ( !defined $delta ) {
379 $delta = $1 ;
380 $result_by_entry{'ENTRY_DELTA'} = $delta ;
381 $catch_name = 1 ;
382 my %tmp = %result_by_entry ;
383 push (@result_by_mz, \%tmp) ;
384 %result_by_entry = () ;
385 ( $name, $cpd_mw, $delta, $adduct, $adduct_mw ) = ( undef, undef, undef, undef, undef ) ;
386 next ;
387 }
388 }
389 }
390 }
391 #Fin de la section contenant les resultats
392 if( $line =~ /<\/table>/ ) {
393 $catch_table = 0 ;
394 my @Tmp = @result_by_mz ;
395 push(@results, \@Tmp) ;
396 @result_by_mz = () ;
397 }
398 }
399 }
400 return(\@results) ;
401 }
402 ## END of SUB
403
404
405 =head2 METHOD get_unik_ids_from_results
406
407 ## Description : get all unik ids from the hmdb result object
408 ## Input : $results
409 ## Output : $ids
410 ## Usage : my ( $ids ) = get_unik_ids_from_results ( $results ) ;
411
412 =cut
413 ## START of SUB
414 sub get_unik_ids_from_results {
415 ## Retrieve Values
416 my $self = shift ;
417 my ( $results ) = @_;
418 my ( %ids ) = ( () ) ;
419
420 foreach my $result (@{$results}) {
421
422 foreach my $entries (@{$result}) {
423
424 if ( ($entries->{'ENTRY_ENTRY_ID'}) and ($entries->{'ENTRY_ENTRY_ID'} ne '' ) ) {
425 $ids{$entries->{'ENTRY_ENTRY_ID'}} = 1 ;
426 }
427 }
428 }
429
430 return (\%ids) ;
431 }
432 ### END of SUB
433
434
435
436 =head2 METHOD get_hmdb_metabocard_from_id
437
438 ## Description : get a metabocard (xml format from an ID on HMDB)
439 ## Input : $ids
440 ## Output : $metabocard_features
441 ## Usage : my ( $metabocard_features ) = get_hmdb_metabocard_from_id ( $ids ) ;
442
443 =cut
444 ## START of SUB
445 sub get_hmdb_metabocard_from_id {
446 ## Retrieve Values
447 my $self = shift ;
448 my ( $ids, $hmdb_url ) = @_;
449 my ( %metabocard_features ) = ( () ) ;
450 my $query = undef ;
451
452 ## structure %metabocard_features
453 # metabolite_id = (
454 # 'metabolite_name' => '__name__',
455 # 'metabolite_inchi' => '__inchi__',
456 # 'metabolite_logp' => '__logp-ALOGPS__',
457 #
458 # )
459
460
461 if( (defined $ids) and ($ids > 0 ) ) {
462
463 foreach my $id (keys %{$ids}) {
464
465 # print "\n============== > $id **********************\n " ;
466 my $twig = undef ;
467
468 if (defined $hmdb_url) {
469 $query = $hmdb_url.$id.'.xml' ;
470
471 ## test the header if exists
472 my $response = head($query) ;
473
474 if (!defined $response) {
475 $metabocard_features{$id}{'metabolite_name'} = undef ;
476 $metabocard_features{$id}{'metabolite_inchi'} = undef ;
477 $metabocard_features{$id}{'metabolite_logp'} = undef ;
478 ## Need to be improve to manage http 404 or other response diff than 200
479 }
480 elsif ($response->is_success) {
481
482 $twig = XML::Twig->nparse_ppe(
483
484 twig_handlers => {
485 # metabolite name
486 'metabolite/name' => sub { $metabocard_features{$id}{'metabolite_name'} = $_ -> text_only ; } ,
487 # metabolite inchi
488 'metabolite/inchi' => sub { $metabocard_features{$id}{'metabolite_inchi'} = $_ -> text_only ; } ,
489 ## metabolite logP
490 'metabolite/predicted_properties/property' => sub {
491
492 my ($kind, $source, $value ) = ( undef, undef, undef ) ;
493
494 if (defined $_->children ) {
495 foreach my $field ($_->children) {
496 if ( $field->name eq 'kind') { $kind = $field->text ; }
497 elsif ( $field->name eq 'source') { $source = $field->text ; }
498 elsif ( $field->name eq 'value') { $value = $field->text ; }
499
500 if (defined $source ) {
501 if ( ( $kind eq 'logp' ) and ( $source eq 'ALOGPS' ) ) {
502 $metabocard_features{$id}{'metabolite_logp'} = $value ;
503 }
504 ($kind, $source, $value ) = ( undef, undef, undef ) ;
505 }
506 }
507 }
508 }
509 },
510 pretty_print => 'indented',
511 error_context => 1, $query
512 );
513
514 # $twig->print;
515 $twig->purge ;
516
517 if (!$@) {
518
519 }
520 else {
521 warn $@ ;
522 }
523 }
524 }
525 else {
526 warn "The hmdb metabocard url is not defined\n" ;
527 last;
528 }
529 }
530 }
531 else {
532 warn "The HMDB ids list from HMDB is empty - No metabocard found\n" ;
533 }
534
535 # print Dumper %metabocard_features ;
536 return (\%metabocard_features) ;
537 }
538 ### END of SUB
539
540
541 =head2 METHOD map_suppl_data_on_hmdb_results
542
543 ## Description : map supplementary data with already collected results with hmdb search
544 ## Input : $results, $features
545 ## Output : $results
546 ## Usage : my ( $results ) = map_suppl_data_on_hmdb_results ( $results, $features ) ;
547
548 =cut
549 ## START of SUB
550 sub map_suppl_data_on_hmdb_results {
551 ## Retrieve Values
552 my $self = shift ;
553 my ( $results, $features ) = @_;
554 my ( @more_results ) = ( () ) ;
555
556 @more_results = @{$results} ; ## Dump array ref to map
557
558 foreach my $result (@more_results) {
559
560 foreach my $entries (@{$result}) {
561
562 if ( ($entries->{'ENTRY_ENTRY_ID'}) and ($entries->{'ENTRY_ENTRY_ID'} ne '' ) ) {
563 ## check that we have a ID for mapping
564 my $current_id = $entries->{'ENTRY_ENTRY_ID'} ;
565 if ($features->{"$current_id"}) {
566 ## Metabolite NAME
567 if (defined $features->{"$current_id"}{'metabolite_name'} ) {
568 $entries->{'ENTRY_ENTRY_NAME'} = $features->{"$current_id"}{'metabolite_name'}
569 }
570 else {
571 $entries->{'ENTRY_ENTRY_NAME'} = 'UNKNOWN' ;
572 }
573 ## Metabolite INCHI
574 if (defined $features->{"$current_id"}{'metabolite_inchi'} ) {
575 $entries->{'ENTRY_ENTRY_INCHI'} = $features->{"$current_id"}{'metabolite_inchi'}
576 }
577 else {
578 $entries->{'ENTRY_ENTRY_INCHI'} = 'NA' ;
579 }
580 ## Metabolite LOGP
581 if (defined $features->{"$current_id"}{'metabolite_logp'} ) {
582 $entries->{'ENTRY_ENTRY_LOGP'} = $features->{"$current_id"}{'metabolite_logp'}
583 }
584 else {
585 $entries->{'ENTRY_ENTRY_LOGP'} = 'NA' ;
586 }
587 }
588 else {
589 warn "This HMDB id doesn't match any collected ids\n" ;
590 }
591 }
592 }
593 }
594
595 return (\@more_results) ;
596 }
597 ### END of SUB
598
599
600 =head2 METHOD set_html_tbody_object
601
602 ## Description : initializes and build the tbody object (perl array) needed to html template
603 ## Input : $nb_pages, $nb_items_per_page
604 ## Output : $tbody_object
605 ## Usage : my ( $tbody_object ) = set_html_tbody_object($nb_pages, $nb_items_per_page) ;
606
607 =cut
608 ## START of SUB
609 sub set_html_tbody_object {
610 my $self = shift ;
611 my ( $nb_pages, $nb_items_per_page ) = @_ ;
612
613 my ( @tbody_object ) = ( ) ;
614
615 for ( my $i = 1 ; $i <= $nb_pages ; $i++ ) {
616
617 my %pages = (
618 # tbody feature
619 PAGE_NB => $i,
620 MASSES => [], ## end MASSES
621 ) ; ## end TBODY N
622 push (@tbody_object, \%pages) ;
623 }
624 return(\@tbody_object) ;
625 }
626 ## END of SUB
627
628 =head2 METHOD add_mz_to_tbody_object
629
630 ## Description : initializes and build the mz object (perl array) needed to html template
631 ## Input : $tbody_object, $nb_items_per_page, $mz_list
632 ## Output : $tbody_object
633 ## Usage : my ( $tbody_object ) = add_mz_to_tbody_object( $tbody_object, $nb_items_per_page, $mz_list ) ;
634
635 =cut
636 ## START of SUB
637 sub add_mz_to_tbody_object {
638 my $self = shift ;
639 my ( $tbody_object, $nb_items_per_page, $mz_list, $ids_list ) = @_ ;
640
641 my ( $current_page, $mz_index ) = ( 0, 0 ) ;
642
643 foreach my $page ( @{$tbody_object} ) {
644
645 my @colors = ('white', 'green') ;
646 my ( $current_index, , $icolor ) = ( 0, 0 ) ;
647
648 for ( my $i = 1 ; $i <= $nb_items_per_page ; $i++ ) {
649 #
650 if ( $current_index > $nb_items_per_page ) { ## manage exact mz per html page
651 $current_index = 0 ;
652 last ; ##
653 }
654 else {
655 $current_index++ ;
656 if ( $icolor > 1 ) { $icolor = 0 ; }
657
658 if ( exists $mz_list->[$mz_index] ) {
659
660 my %mz = (
661 # mass feature
662 MASSES_ID_QUERY => $ids_list->[$mz_index],
663 MASSES_MZ_QUERY => $mz_list->[$mz_index],
664 MZ_COLOR => $colors[$icolor],
665 MASSES_NB => $mz_index+1,
666 ENTRIES => [] ,
667 ) ;
668 push ( @{ $tbody_object->[$current_page]{MASSES} }, \%mz ) ;
669 # Html attr for mass
670 $icolor++ ;
671 }
672 }
673 $mz_index++ ;
674 } ## foreach mz
675
676 $current_page++ ;
677 }
678 return($tbody_object) ;
679 }
680 ## END of SUB
681
682 =head2 METHOD add_entries_to_tbody_object
683
684 ## Description : initializes and build the entries object (perl array) needed to html template
685 ## Input : $tbody_object, $nb_items_per_page, $mz_list, $entries
686 ## Output : $tbody_object
687 ## Usage : my ( $tbody_object ) = add_entries_to_tbody_object( $tbody_object, $nb_items_per_page, $mz_list, $entries ) ;
688
689 =cut
690 ## START of SUB
691 sub add_entries_to_tbody_object {
692 ## Retrieve Values
693 my $self = shift ;
694 my ( $tbody_object, $nb_items_per_page, $mz_list, $entries ) = @_ ;
695
696 my $index_page = 0 ;
697 my $index_mz_continous = 0 ;
698
699 foreach my $page (@{$tbody_object}) {
700
701 my $index_mz = 0 ;
702
703 foreach my $mz (@{ $tbody_object->[$index_page]{MASSES} }) {
704
705 my $index_entry = 0 ;
706
707 my @anti_redondant = ('N/A') ;
708 my $check_rebond = 0 ;
709 my $check_noentry = 0 ;
710
711 foreach my $entry (@{ $entries->[$index_mz_continous] }) {
712 $check_noentry ++ ;
713 ## dispo anti doublons des entries
714 foreach my $rebond (@anti_redondant) {
715 if ( $rebond eq $entries->[$index_mz_continous][$index_entry]{ENTRY_ENTRY_ID} ) { $check_rebond = 1 ; last ; }
716 }
717
718 if ( $check_rebond == 0 ) {
719
720 push ( @anti_redondant, $entries->[$index_mz_continous][$index_entry]{ENTRY_ENTRY_ID} ) ;
721
722 my %entry = (
723 ENTRY_COLOR => $tbody_object->[$index_page]{MASSES}[$index_mz]{MZ_COLOR},
724 ENTRY_ENTRY_NAME => $entries->[$index_mz_continous][$index_entry]{ENTRY_ENTRY_NAME},
725 ENTRY_ENTRY_ID => $entries->[$index_mz_continous][$index_entry]{ENTRY_ENTRY_ID},
726 ENTRY_ENTRY_ID2 => $entries->[$index_mz_continous][$index_entry]{ENTRY_ENTRY_ID},
727 ENTRY_FORMULA => $entries->[$index_mz_continous][$index_entry]{ENTRY_FORMULA},
728 ENTRY_CPD_MZ => $entries->[$index_mz_continous][$index_entry]{ENTRY_CPD_MZ},
729 ENTRY_ADDUCT => $entries->[$index_mz_continous][$index_entry]{ENTRY_ADDUCT},
730 ENTRY_ADDUCT_TYPE => $entries->[$index_mz_continous][$index_entry]{ENTRY_ADDUCT_TYPE},
731 ENTRY_ADDUCT_MZ => $entries->[$index_mz_continous][$index_entry]{ENTRY_ADDUCT_MZ},
732 ENTRY_DELTA => $entries->[$index_mz_continous][$index_entry]{ENTRY_DELTA},
733 ENTRY_ENTRY_INCHI => $entries->[$index_mz_continous][$index_entry]{ENTRY_ENTRY_INCHI},
734 ENTRY_ENTRY_LOGP => $entries->[$index_mz_continous][$index_entry]{ENTRY_ENTRY_LOGP},
735 ) ;
736
737 push ( @{ $tbody_object->[$index_page]{MASSES}[$index_mz]{ENTRIES} }, \%entry) ;
738 }
739 $check_rebond = 0 ; ## reinit double control
740 $index_entry++ ;
741 } ## end foreach
742 if ($check_noentry == 0 ) {
743 my %entry = (
744 ENTRY_COLOR => $tbody_object->[$index_page]{MASSES}[$index_mz]{MZ_COLOR},
745 ENTRY_ENTRY_NAME => 'UNKNOWN',
746 ENTRY_ENTRY_ID => 'NONE',
747 ENTRY_ENTRY_ID2 => '',
748 ENTRY_FORMULA => 'n/a',
749 ENTRY_CPD_MZ => 'n/a',
750 ENTRY_ADDUCT => 'n/a',
751 ENTRY_ADDUCT_TYPE => 'n/a',
752 ENTRY_ADDUCT_MZ => 'n/a',
753 ENTRY_DELTA => 0,
754 ENTRY_ENTRY_INCHI => 'n/a',
755 ENTRY_ENTRY_LOGP => 'n/a',
756 ) ;
757 push ( @{ $tbody_object->[$index_page]{MASSES}[$index_mz]{ENTRIES} }, \%entry) ;
758 }
759 $index_mz ++ ;
760 $index_mz_continous ++ ;
761 }
762 $index_page++ ;
763 }
764 return($tbody_object) ;
765 }
766 ## END of SUB
767
768 =head2 METHOD write_html_skel
769
770 ## Description : prepare and write the html output file
771 ## Input : $html_file_name, $html_object, $html_template
772 ## Output : $html_file_name
773 ## Usage : my ( $html_file_name ) = write_html_skel( $html_file_name, $html_object ) ;
774
775 =cut
776 ## START of SUB
777 sub write_html_skel {
778 ## Retrieve Values
779 my $self = shift ;
780 my ( $html_file_name, $html_object, $pages , $search_condition, $html_template, $js_path, $css_path ) = @_ ;
781
782 my $html_file = $$html_file_name ;
783
784 if ( defined $html_file ) {
785 open ( HTML, ">$html_file" ) or die "Can't create the output file $html_file " ;
786
787 if (-e $html_template) {
788 my $ohtml = HTML::Template->new(filename => $html_template);
789 $ohtml->param( JS_GALAXY_PATH => $js_path, CSS_GALAXY_PATH => $css_path ) ;
790 $ohtml->param( CONDITIONS => $search_condition ) ;
791 $ohtml->param( PAGES_NB => $pages ) ;
792 $ohtml->param( PAGES => $html_object ) ;
793 print HTML $ohtml->output ;
794 }
795 else {
796 croak "Can't fill any html output : No template available ($html_template)\n" ;
797 }
798
799 close (HTML) ;
800 }
801 else {
802 croak "No output file name available to write HTML file\n" ;
803 }
804 return(\$html_file) ;
805 }
806 ## END of SUB
807
808 =head2 METHOD set_lm_matrix_object
809
810 ## Description : build the hmdb_row under its ref form
811 ## Input : $header, $init_mzs, $entries
812 ## Output : $hmdb_matrix
813 ## Usage : my ( $hmdb_matrix ) = set_lm_matrix_object( $header, $init_mzs, $entries ) ;
814
815 =cut
816 ## START of SUB
817 sub set_lm_matrix_object {
818 ## Retrieve Values
819 my $self = shift ;
820 my ( $header, $init_mzs, $entries ) = @_ ;
821
822 my @hmdb_matrix = () ;
823
824 if ( defined $header ) {
825 my @headers = () ;
826 push @headers, $header ;
827 push @hmdb_matrix, \@headers ;
828 }
829
830 my $index_mz = 0 ;
831
832 foreach my $mz ( @{$init_mzs} ) {
833
834 my $index_entries = 0 ;
835 my @clusters = () ;
836 my $cluster_col = undef ;
837
838 my @anti_redondant = ('N/A') ;
839 my $check_rebond = 0 ;
840
841 my $nb_entries = scalar (@{ $entries->[$index_mz] }) ;
842
843 foreach my $entry (@{ $entries->[$index_mz] }) {
844
845 ## dispo anti doublons des entries
846 foreach my $rebond (@anti_redondant) {
847 if ( $rebond eq $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID} ) { $check_rebond = 1 ; last ; }
848 }
849
850 if ( $check_rebond == 0 ) {
851
852 push ( @anti_redondant, $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID} ) ;
853
854 my $delta = $entries->[$index_mz][$index_entries]{ENTRY_DELTA} ;
855 my $formula = $entries->[$index_mz][$index_entries]{ENTRY_FORMULA} ;
856 my $hmdb_id = $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID} ;
857
858 ## METLIN data display model
859 ## entry1=VAR1::VAR2::VAR3::VAR4|entry2=VAR1::VAR2::VAR3::VAR4|...
860 # manage final pipe
861 if ($index_entries < $nb_entries-1 ) { $cluster_col .= $delta.'::('.$formula.')::'.$hmdb_id.'|' ; }
862 else { $cluster_col .= $delta.'::('.$formula.')::'.$hmdb_id ; }
863
864 }
865 $check_rebond = 0 ; ## reinit double control
866 $index_entries++ ;
867 } ## end foreach
868 if ( !defined $cluster_col ) { $cluster_col = 'NONE' ; }
869 push (@clusters, $cluster_col) ;
870 push (@hmdb_matrix, \@clusters) ;
871 $index_mz++ ;
872 }
873 return(\@hmdb_matrix) ;
874 }
875 ## END of SUB
876
877 =head2 METHOD set_hmdb_matrix_object_with_ids
878
879 ## Description : build the hmdb_row under its ref form (IDS only)
880 ## Input : $header, $init_mzs, $entries
881 ## Output : $hmdb_matrix
882 ## Usage : my ( $hmdb_matrix ) = set_hmdb_matrix_object_with_ids( $header, $init_mzs, $entries ) ;
883
884 =cut
885 ## START of SUB
886 sub set_hmdb_matrix_object_with_ids {
887 ## Retrieve Values
888 my $self = shift ;
889 my ( $header, $init_mzs, $entries ) = @_ ;
890
891 my @hmdb_matrix = () ;
892
893 if ( defined $header ) {
894 my @headers = () ;
895
896 ## redefined the header hmdb(delta::name::mz::formula::adduct::id)
897 $header = 'hmdb(delta::name::mz::formula::adduct::id)' ;
898 push @headers, $header ;
899 push @hmdb_matrix, \@headers ;
900 }
901
902 my $index_mz = 0 ;
903
904 foreach my $mz ( @{$init_mzs} ) {
905
906 my $index_entries = 0 ;
907 my @clusters = () ;
908 my $cluster_col = undef ;
909
910 my @anti_redondant = ('N/A') ;
911 my $check_rebond = 0 ;
912
913 my $nb_entries = scalar (@{ $entries->[$index_mz] }) ;
914
915 foreach my $entry (@{ $entries->[$index_mz] }) {
916
917 ## dispo anti doublons des entries
918 foreach my $rebond (@anti_redondant) {
919 if ( $rebond eq $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID} ) { $check_rebond = 1 ; last ; }
920 }
921
922 if ( $check_rebond == 0 ) {
923
924 push ( @anti_redondant, $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID} ) ;
925 ##
926 my $hmdb_name = $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_NAME} ;
927 my $hmdb_id = $entries->[$index_mz][$index_entries]{ENTRY_ENTRY_ID} ;
928 my $hmdb_formula = $entries->[$index_mz][$index_entries]{ENTRY_FORMULA} ;
929 my $hmdb_cpd_mz = $entries->[$index_mz][$index_entries]{ENTRY_CPD_MZ} ;
930 my $hmdb_adduct = $entries->[$index_mz][$index_entries]{ENTRY_ADDUCT} ;
931 my $hmdb_delta = $entries->[$index_mz][$index_entries]{ENTRY_DELTA} ;
932
933 ## METLIN data display model
934 ## entry1= ENTRY_DELTA::ENTRY_ENTRY_NAME::ENTRY_CPD_MZ::ENTRY_FORMULA::ENTRY_ADDUCT::ENTRY_ENTRY_ID | entry2=VAR1::VAR2::VAR3::VAR4|...
935 my $entry = $hmdb_delta.'::['."$hmdb_name".']::'.$hmdb_cpd_mz.'::'.$hmdb_formula.'::['.$hmdb_adduct.']::'.$hmdb_id ;
936
937 # manage final pipe
938 if ($index_entries < $nb_entries-1 ) { $cluster_col .= $entry.' | ' ; }
939 else { $cluster_col .= $entry ; }
940
941 }
942 $check_rebond = 0 ; ## reinit double control
943 $index_entries++ ;
944 } ## end foreach
945 if ( !defined $cluster_col ) { $cluster_col = 'NONE' ; }
946 push (@clusters, $cluster_col) ;
947 push (@hmdb_matrix, \@clusters) ;
948 $index_mz++ ;
949 }
950 return(\@hmdb_matrix) ;
951 }
952 ## END of SUB
953
954 =head2 METHOD add_lm_matrix_to_input_matrix
955
956 ## Description : build a full matrix (input + lm column)
957 ## Input : $input_matrix_object, $lm_matrix_object, $nb_header
958 ## Output : $output_matrix_object
959 ## Usage : my ( $output_matrix_object ) = add_lm_matrix_to_input_matrix( $input_matrix_object, $lm_matrix_object, $nb_header ) ;
960
961 =cut
962 ## START of SUB
963 sub add_lm_matrix_to_input_matrix {
964 ## Retrieve Values
965 my $self = shift ;
966 my ( $input_matrix_object, $lm_matrix_object, $nb_header ) = @_ ;
967
968 my @output_matrix_object = () ;
969 my $index_row = 0 ;
970 my $line = 0 ;
971
972 foreach my $row ( @{$input_matrix_object} ) {
973 my @init_row = @{$row} ;
974 $line++;
975
976 if ( ( defined $nb_header ) and ( $line <= $nb_header) ) {
977 push (@output_matrix_object, \@init_row) ;
978 next ;
979 }
980
981 if ( $lm_matrix_object->[$index_row] ) {
982 my $dim = scalar(@{$lm_matrix_object->[$index_row]}) ;
983
984 if ($dim > 1) { warn "the add method can't manage more than one column\n" ;}
985 my $lm_col = $lm_matrix_object->[$index_row][$dim-1] ;
986
987 push (@init_row, $lm_col) ;
988 $index_row++ ;
989 }
990 push (@output_matrix_object, \@init_row) ;
991 }
992 return(\@output_matrix_object) ;
993 }
994 ## END of SUB
995
996 =head2 METHOD write_csv_skel
997
998 ## Description : prepare and write csv output file
999 ## Input : $csv_file, $rows
1000 ## Output : $csv_file
1001 ## Usage : my ( $csv_file ) = write_csv_skel( $csv_file, $rows ) ;
1002
1003 =cut
1004 ## START of SUB
1005 sub write_csv_skel {
1006 ## Retrieve Values
1007 my $self = shift ;
1008 my ( $csv_file, $rows ) = @_ ;
1009
1010 my $ocsv = lib::csv::new( {is_binary => 1 , quote_binary => 0, quote_char => undef }) ;
1011 my $csv = $ocsv->get_csv_object("\t") ;
1012 $ocsv->write_csv_from_arrays($csv, $$csv_file, $rows) ;
1013
1014 return($csv_file) ;
1015 }
1016 ## END of SUB
1017
1018 =head2 METHOD write_csv_one_mass
1019
1020 ## Description : print a cvs file
1021 ## Input : $masses, $ids, $results, $file
1022 ## Output : N/A
1023 ## Usage : write_csv_one_mass( $ids, $results, $file ) ;
1024
1025 =cut
1026 ## START of SUB
1027 sub write_csv_one_mass {
1028 ## Retrieve Values
1029 my $self = shift ;
1030 my ( $masses, $ids, $results, $file, ) = @_ ;
1031
1032 open(CSV, '>:utf8', "$file") or die "Cant' create the file $file\n" ;
1033 print CSV "ID\tQuery(Da)\tDelta\tMetabolite_Name\tCpd_MW(Da)\tFormula\tAdduct\tAdduct_MW(Da)\tHMDB_ID\n" ;
1034
1035 my $i = 0 ;
1036
1037 foreach my $id (@{$ids}) {
1038 my $mass = undef ;
1039 if ( $masses->[$i] ) { $mass = $masses->[$i] ; }
1040 else { last ; }
1041
1042 if ( $results->[$i] ) { ## an requested id has a result in the list of hashes $results.
1043
1044 my @anti_redondant = ('N/A') ;
1045 my $check_rebond = 0 ;
1046 my $check_noentry = 0 ;
1047
1048 foreach my $entry (@{$results->[$i]}) {
1049 $check_noentry ++ ;
1050 ## dispo anti doublons des entries
1051 foreach my $rebond (@anti_redondant) {
1052 if ( $rebond eq $entry->{ENTRY_ENTRY_ID} ) { $check_rebond = 1 ; last ; }
1053 }
1054 # print "\n-----------------------" ;
1055 # print Dumper $entry->{ENTRY_ENTRY_ID} ;
1056 # print "-------------------------$check_rebond\n" ;
1057 # print Dumper @anti_redondant ;
1058 if ( $check_rebond == 0 ) {
1059
1060 push ( @anti_redondant, $entry->{ENTRY_ENTRY_ID} ) ;
1061
1062 print CSV "$id\t$mass\t" ;
1063
1064 ## print delta
1065 if ( $entry->{ENTRY_DELTA} ) { print CSV "$entry->{ENTRY_DELTA}\t" ; }
1066 else { print CSV "0\t" ; }
1067
1068 ## print cpd name
1069 if ( $entry->{ENTRY_ENTRY_NAME} ) { print CSV "[$entry->{ENTRY_ENTRY_NAME}]\t" ; }
1070 else { print CSV "UNKNOWN\t" ; }
1071
1072 ## print cpd mz
1073 if ( $entry->{ENTRY_CPD_MZ} ) { print CSV "$entry->{ENTRY_CPD_MZ}\t" ; }
1074 else { print CSV "N/A\t" ; }
1075
1076 ## print cpd formula
1077 if ( $entry->{ENTRY_FORMULA} ) { print CSV "$entry->{ENTRY_FORMULA}\t" ; }
1078 else { print CSV "N/A\t" ; }
1079
1080 ## print adduct
1081 if ( $entry->{ENTRY_ADDUCT} ) { print CSV "[$entry->{ENTRY_ADDUCT}]\t" ; }
1082 else { print CSV "N/A\t" ; }
1083
1084 ## print adduct mz
1085 if ( $entry->{ENTRY_ADDUCT_MZ} ) { print CSV "$entry->{ENTRY_ADDUCT_MZ}\t" ; }
1086 else { print CSV "N/A\t" ; }
1087
1088 ## print cpd id
1089 if ( $entry->{ENTRY_ENTRY_ID} ) { print CSV "$entry->{ENTRY_ENTRY_ID}\n" ; }
1090 else { print CSV "N/A\n" ; }
1091 }
1092 $check_rebond = 0 ; ## reinit double control
1093 } ## end foreach
1094 if ($check_noentry == 0 ) {
1095 print CSV "$id\t$mass\t0\tUNKNOWN\tN/A\tN/A\tN/A\tN/A\tN/A\n" ;
1096 }
1097 }
1098 $i++ ;
1099 }
1100 close(CSV) ;
1101 return() ;
1102 }
1103 ## END of SUB
1104
1105 1 ;
1106
1107
1108 __END__
1109
1110 =head1 SUPPORT
1111
1112 You can find documentation for this module with the perldoc command.
1113
1114 perldoc hmdb.pm
1115
1116 =head1 Exports
1117
1118 =over 4
1119
1120 =item :ALL is ...
1121
1122 =back
1123
1124 =head1 AUTHOR
1125
1126 Franck Giacomoni E<lt>franck.giacomoni@clermont.inra.frE<gt>
1127
1128 =head1 LICENSE
1129
1130 This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
1131
1132 =head1 VERSION
1133
1134 version 1 : 06 / 06 / 2013
1135
1136 version 2 : 27 / 01 / 2014
1137
1138 version 3 : 19 / 11 / 2014
1139
1140 version 4 : 28 / 01 / 2016
1141
1142 version 5 : 02 / 11 /2016
1143
1144 =cut