Mercurial > repos > fgiacomoni > massbank_ws_searchspectrum
comparison lib/mapper.pm @ 0:023c380900ef draft default tip
Init repository with last massbank_ws_searchspectrum master version
| author | fgiacomoni |
|---|---|
| date | Wed, 19 Apr 2017 11:31:58 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:023c380900ef |
|---|---|
| 1 package lib::mapper ; | |
| 2 | |
| 3 use strict; | |
| 4 use warnings ; | |
| 5 use Exporter ; | |
| 6 use Carp ; | |
| 7 | |
| 8 use Data::Dumper ; | |
| 9 | |
| 10 use vars qw($VERSION @ISA @EXPORT %EXPORT_TAGS); | |
| 11 | |
| 12 our $VERSION = "1.0"; | |
| 13 our @ISA = qw(Exporter); | |
| 14 our @EXPORT = qw( add_min_max_for_pcgroup_res get_massbank_records_by_chunk compute_ids_from_pcgroups_res filter_pcgroup_res get_pcgroup_list get_pcgroups set_massbank_matrix_object add_massbank_matrix_to_input_matrix map_pc_to_generic_json set_html_tbody_object add_mz_to_tbody_object add_entries_to_tbody_object); | |
| 15 our %EXPORT_TAGS = ( ALL => [qw( add_min_max_for_pcgroup_res get_massbank_records_by_chunk compute_ids_from_pcgroups_res filter_pcgroup_res get_pcgroup_list get_pcgroups set_massbank_matrix_object add_massbank_matrix_to_input_matrix map_pc_to_generic_json set_html_tbody_object add_mz_to_tbody_object add_entries_to_tbody_object)] ); | |
| 16 | |
| 17 =head1 NAME | |
| 18 | |
| 19 My::Module - An example module | |
| 20 | |
| 21 =head1 SYNOPSIS | |
| 22 | |
| 23 use My::Module; | |
| 24 my $object = My::Module->new(); | |
| 25 print $object->as_string; | |
| 26 | |
| 27 =head1 DESCRIPTION | |
| 28 | |
| 29 This module does not really exist, it | |
| 30 was made for the sole purpose of | |
| 31 demonstrating how POD works. | |
| 32 | |
| 33 =head1 METHODS | |
| 34 | |
| 35 Methods are : | |
| 36 | |
| 37 =head2 METHOD new | |
| 38 | |
| 39 ## Description : new | |
| 40 ## Input : $self | |
| 41 ## Ouput : bless $self ; | |
| 42 ## Usage : new() ; | |
| 43 | |
| 44 =cut | |
| 45 | |
| 46 sub new { | |
| 47 ## Variables | |
| 48 my $self={}; | |
| 49 bless($self) ; | |
| 50 return $self ; | |
| 51 } | |
| 52 ### END of SUB | |
| 53 | |
| 54 =head2 METHOD get_pcgroups | |
| 55 | |
| 56 ## Description : get and prepare pcgroup features (mzs, into, names) from input cvs parser | |
| 57 ## Input : $pcs, $mzs, $ints, $names | |
| 58 ## Output : $pcgroups | |
| 59 ## Usage : my ( $pcgroups ) = get_pcgroups( $pcs, $mzs, $ints, $names ) ; | |
| 60 | |
| 61 =cut | |
| 62 ## START of SUB | |
| 63 sub get_pcgroups { | |
| 64 my $self = shift; | |
| 65 my ( $pcs, $mzs, $ints ) = @_; | |
| 66 | |
| 67 my %pcgroups = () ; | |
| 68 my $i = 0 ; | |
| 69 | |
| 70 ## Warn diff matrix dimension : | |
| 71 my $num_pcs = scalar(@{$pcs}) ; | |
| 72 my $num_mzs = scalar(@{$mzs}) ; | |
| 73 my $num_ints = scalar(@{$ints}) ; | |
| 74 | |
| 75 if ( ($num_pcs == $num_mzs ) and ( $num_mzs == $num_ints ) ) { | |
| 76 my @pcs = @{$pcs} ; | |
| 77 | |
| 78 foreach my $pc (@{$pcs}) { | |
| 79 | |
| 80 if ( ! $pcgroups{$pc} ) { $pcgroups{$pc}->{'id'} = $pc ; $pcgroups{$pc}->{'annotation'} = {} ; $pcgroups{$pc}->{'massbank_ids'} = [] ; } | |
| 81 | |
| 82 push (@{$pcgroups{$pc}->{'mzmed'}}, $mzs->[$i]) if ($mzs->[$i]) ; ## map mzs by pcgroup | |
| 83 | |
| 84 if ($ints->[$i] > 0 ) { push (@{$pcgroups{$pc}->{'into'}}, $ints->[$i]) ; ## map into by pcgroup | |
| 85 } | |
| 86 elsif ($ints->[$i] == 0) { | |
| 87 push (@{$pcgroups{$pc}->{'into'}}, $ints->[$i]) ; ## map into by pcgroup even value is 0 | |
| 88 } | |
| 89 else { | |
| 90 warn "Undefined value found in pcgroups array\n" ; | |
| 91 } | |
| 92 $i++ ; | |
| 93 } | |
| 94 } | |
| 95 else { | |
| 96 warn "The different ARRAYS (pcs, mzs, ints) doesn't have the same size : mapping is not possible \n!!" | |
| 97 } | |
| 98 return (\%pcgroups) ; | |
| 99 } | |
| 100 ### END of SUB | |
| 101 | |
| 102 =head2 METHOD get_pcgroup_list | |
| 103 | |
| 104 ## Description : get and prepare unik pcgroup list from input cvs parsed list | |
| 105 ## Input : $pcs | |
| 106 ## Output : $list | |
| 107 ## Usage : my ( $list ) = get_pcgroup_list( $pcs ) ; | |
| 108 | |
| 109 =cut | |
| 110 ## START of SUB | |
| 111 sub get_pcgroup_list { | |
| 112 my $self = shift; | |
| 113 my ( $pcs ) = @_; | |
| 114 | |
| 115 my @pcgroup_list = () ; | |
| 116 my $i = 0 ; | |
| 117 | |
| 118 my %hash = map { $_, 1 } @{$pcs} ; | |
| 119 @pcgroup_list = keys %hash; | |
| 120 @pcgroup_list = sort { $a <=> $b } @pcgroup_list ; | |
| 121 | |
| 122 return (\@pcgroup_list) ; | |
| 123 } | |
| 124 | |
| 125 ### END of SUB | |
| 126 | |
| 127 | |
| 128 =head2 METHOD filter_pcgroup_res | |
| 129 | |
| 130 ## Description : This method filter the results returned by massbank with a user defined score threshold | |
| 131 ## Input : $pcgroups, $threshold | |
| 132 ## Output : $pcgroups | |
| 133 ## Usage : my ( $pcgroups ) = filter_pcgroup_res ( $pcgroups, $threshold ) ; | |
| 134 | |
| 135 =cut | |
| 136 ## START of SUB | |
| 137 sub filter_pcgroup_res { | |
| 138 ## Retrieve Values | |
| 139 my $self = shift ; | |
| 140 my ( $pcgroups, $threshold ) = @_ ; | |
| 141 | |
| 142 my %temp = () ; | |
| 143 | |
| 144 if (!defined $threshold) { | |
| 145 $threshold = 0.5 ; ## default value | |
| 146 } | |
| 147 | |
| 148 if ( (defined $pcgroups) and (defined $threshold) ) { | |
| 149 %temp = %{$pcgroups} ; | |
| 150 | |
| 151 foreach my $pc (keys %temp) { | |
| 152 | |
| 153 if ( $temp{$pc}{'annotation'}{'num_res'} > 0 ) { | |
| 154 my @filtered_annot = reverse(grep { $_->{'score'} >= $threshold if ($_->{'score'}) } @{$temp{$pc}{'annotation'}{'res'}}) ; | |
| 155 my $new_num_res = scalar (@filtered_annot) ; | |
| 156 my @ids = () ; | |
| 157 foreach (@filtered_annot) { push (@ids, $_->{'id'} ) } | |
| 158 $temp{$pc}{'annotation'}{'res'} =\@filtered_annot ; | |
| 159 $temp{$pc}{'annotation'}{'num_res'} = $new_num_res ; | |
| 160 $temp{$pc}{'massbank_ids'} = \@ids ; | |
| 161 } | |
| 162 else { | |
| 163 warn "No result found for this pcgroup $pc\n" ; | |
| 164 } | |
| 165 } | |
| 166 } ## End IF | |
| 167 else { | |
| 168 warn "No pcgroup and threshold defined\n" ; | |
| 169 } | |
| 170 return (\%temp) ; | |
| 171 } | |
| 172 ### END of SUB | |
| 173 | |
| 174 =head2 METHOD add_min_max_for_pcgroup_res | |
| 175 | |
| 176 ## Description : This method add min / max value for each mzmed contained in pcgroup | |
| 177 ## Input : $pcgroups | |
| 178 ## Output : $pcgroups | |
| 179 ## Usage : my ( $pcgroups ) = add_min_max_for_pcgroup_res ( $pcgroups ) ; | |
| 180 | |
| 181 =cut | |
| 182 ## START of SUB | |
| 183 sub add_min_max_for_pcgroup_res { | |
| 184 ## Retrieve Values | |
| 185 my $self = shift ; | |
| 186 my ( $pcgroups, $delta ) = @_ ; | |
| 187 | |
| 188 my %temp = () ; | |
| 189 | |
| 190 if (!defined $delta) { | |
| 191 $delta = 0.01 ; ## default value | |
| 192 } | |
| 193 | |
| 194 if ( defined $pcgroups) { | |
| 195 %temp = %{$pcgroups} ; | |
| 196 | |
| 197 foreach my $pc (keys %temp) { | |
| 198 my %mz_intervales = () ; | |
| 199 if ( $temp{$pc}{'mzmed'} ) { | |
| 200 my @temp = @{$temp{$pc}{'mzmed'}} ; | |
| 201 foreach my $mz (@temp) { | |
| 202 my ($min, $max) = lib::mapper::new->min_and_max_from_double_with_delta($mz, 'Da', $delta); | |
| 203 $mz_intervales{$mz} = {'min' => $min, 'max' => $max } ; | |
| 204 } | |
| 205 } | |
| 206 else { | |
| 207 warn "No mzmed found for this pcgroup\n" ; | |
| 208 } | |
| 209 $temp{$pc}{'intervales'} = \%mz_intervales ; | |
| 210 | |
| 211 } | |
| 212 } ## End IF | |
| 213 else { | |
| 214 warn "No pcgroup and threshold defined\n" ; | |
| 215 } | |
| 216 return (\%temp) ; | |
| 217 } | |
| 218 ### END of SUB | |
| 219 | |
| 220 | |
| 221 | |
| 222 =head2 METHOD min_and_max_from_double_with_delta | |
| 223 | |
| 224 ## Description : returns the minimum and maximum double according to the delta | |
| 225 ## Input : \$double, \$delta_type, \$delta | |
| 226 ## Output : \$min, \$max | |
| 227 ## Usage : ($min, $max)= min_and_max_from_double_with_delta($double, $delta_type, $mz_delta) ; | |
| 228 | |
| 229 =cut | |
| 230 ## START of SUB | |
| 231 sub min_and_max_from_double_with_delta { | |
| 232 ## Retrieve Values | |
| 233 my $self = shift ; | |
| 234 my ( $double, $delta_type, $delta ) = @_ ; | |
| 235 my ( $min, $max ) = ( undef, undef ) ; | |
| 236 | |
| 237 if ($delta_type eq 'ppm'){ | |
| 238 $min = $double - ($delta * 10**-6 * $double); | |
| 239 $max = $double + ($delta * 10**-6 * $double) + 0.0000000001; ## it's to included the maximum value in the search | |
| 240 } | |
| 241 elsif ($delta_type eq 'Da'){ | |
| 242 $min = $double - $delta; | |
| 243 $max = $double + $delta + 0.0000000001; ## it's to included the maximum value in the search | |
| 244 } | |
| 245 else { croak "The double delta type '$delta_type' isn't a valid type !\n" ; } | |
| 246 | |
| 247 return($min, $max) ; | |
| 248 } | |
| 249 ## END of SUB | |
| 250 | |
| 251 | |
| 252 =head2 METHOD compute_ids_from_pcgroups_res | |
| 253 | |
| 254 ## Description : get all ids returned by massbank with sent queries and keep only unique ones. | |
| 255 ## Input : $pcgroups | |
| 256 ## Output : $unique_ids | |
| 257 ## Usage : my ( $unique_ids ) = compute_ids_from_pcgroups_res ( $pcgroups ) ; | |
| 258 | |
| 259 =cut | |
| 260 ## START of SUB | |
| 261 sub compute_ids_from_pcgroups_res { | |
| 262 ## Retrieve Values | |
| 263 my $self = shift ; | |
| 264 my ( $pcgroups ) = @_; | |
| 265 my ( @ids, @unique ) = ( (), () ) ; | |
| 266 | |
| 267 if ( defined $pcgroups ) { | |
| 268 | |
| 269 foreach my $pc ( keys %{$pcgroups} ) { | |
| 270 if ( $pcgroups->{$pc}{'massbank_ids'} ) { | |
| 271 push (@ids , @{ $pcgroups->{$pc}{'massbank_ids'} } ) ; | |
| 272 } | |
| 273 } | |
| 274 | |
| 275 if ( ( scalar (@ids) ) > 0 ) { | |
| 276 # print Dumper @ids ; | |
| 277 @unique = do { my %seen; grep { !$seen{$_}++ if (defined $_) } @ids }; | |
| 278 @unique = sort { $a cmp $b } @unique; | |
| 279 } | |
| 280 else { | |
| 281 @unique = () ; | |
| 282 } | |
| 283 | |
| 284 | |
| 285 } | |
| 286 return (\@unique) ; | |
| 287 } | |
| 288 ### END of SUB | |
| 289 | |
| 290 | |
| 291 =head2 METHOD get_massbank_records_by_chunk | |
| 292 | |
| 293 ## Description : get massbank records from a complete list but send queries chunk by chunk. | |
| 294 ## Input : $ids, $chunk_size | |
| 295 ## Output : $records | |
| 296 ## Usage : my ( $records ) = get_massbank_records_by_chunk ( $ids, $chunk_size ) ; | |
| 297 | |
| 298 =cut | |
| 299 ## START of SUB | |
| 300 sub get_massbank_records_by_chunk { | |
| 301 ## Retrieve Values | |
| 302 my $self = shift ; | |
| 303 my ( $server, $ids, $chunk_size ) = @_; | |
| 304 my ( @records, @sent_ids ) = ( (), () ) ; | |
| 305 | |
| 306 my $current = 0 ; | |
| 307 my $pos = 1 ; | |
| 308 my @temp_ids = () ; | |
| 309 | |
| 310 my $num_ids = scalar(@{$ids}) ; | |
| 311 # print "The number of given massbank ids is: $num_ids\n" ; | |
| 312 | |
| 313 foreach my $id (@{$ids}) { | |
| 314 $current++ ; | |
| 315 # print "$id - - $current/$num_ids) - - $pos \n" ; | |
| 316 | |
| 317 if ( ($current == $num_ids) or ($pos == $chunk_size) ) { | |
| 318 # print "Querying Massbank with...\n" ; | |
| 319 push (@temp_ids, $id) ; | |
| 320 ## send query | |
| 321 my $omassbank = lib::massbank_api->new() ; | |
| 322 my ($osoap) = $omassbank->selectMassBank($server) ; | |
| 323 my ($records) = $omassbank->getRecordInfo($osoap, \@temp_ids) ; | |
| 324 push (@records, @{$records}) ; | |
| 325 | |
| 326 @temp_ids = () ; | |
| 327 $pos = 0 ; | |
| 328 } | |
| 329 elsif ($pos < $chunk_size) { | |
| 330 # print "store...\n"; | |
| 331 push (@temp_ids, $id) ; | |
| 332 $pos ++ ; | |
| 333 } | |
| 334 else { | |
| 335 warn "Something goes wrong : out of range\n" | |
| 336 } | |
| 337 | |
| 338 | |
| 339 } | |
| 340 my $num_records = scalar(@records) ; | |
| 341 # print "The number of received massbank records is: $num_records\n" ; | |
| 342 return (\@records) ; | |
| 343 } | |
| 344 ### END of SUB | |
| 345 | |
| 346 =head2 METHOD set_massbank_matrix_object | |
| 347 | |
| 348 ## Description : build the massbank_row under its ref form | |
| 349 ## Input : $header, $init_pcs, $init_mzs, $pcgroups, $records | |
| 350 ## Output : $massbank_matrix | |
| 351 ## Usage : my ( $massbank_matrix ) = set_lm_matrix_object( $header, $init_pcs, $init_mzs, $pcgroups, $records ) ; | |
| 352 | |
| 353 =cut | |
| 354 ## START of SUB | |
| 355 sub set_massbank_matrix_object { | |
| 356 ## Retrieve Values | |
| 357 my $self = shift ; | |
| 358 my ( $header, $init_pcs, $init_mzs, $pcgroups, $records ) = @_ ; | |
| 359 my @massbank_matrix = () ; | |
| 360 | |
| 361 my $current_pos = 0 ; | |
| 362 | |
| 363 ## format massbank(score::name::mz::formula::adduct::id) | |
| 364 if ( defined $header ) { | |
| 365 $header .= '(score::name::mz::formula::adduct::id)' ; | |
| 366 my @headers = () ; | |
| 367 push @headers, $header ; | |
| 368 push @massbank_matrix, \@headers ; | |
| 369 } | |
| 370 | |
| 371 ## foreach mz of the input file | |
| 372 foreach my $mz (@{$init_mzs}) { | |
| 373 | |
| 374 my $nb_ids = 0 ; | |
| 375 my @ids = () ; | |
| 376 | |
| 377 my $pc = $init_pcs->[$current_pos] ; ## get the rigth pcgroup with maz postion in list | |
| 378 # print "---> Current PCGROUP is $pc\n" ; | |
| 379 if ( $pcgroups->{$pc}{'enrich_annotation'}{$mz} ) { | |
| 380 ## get record_ids | |
| 381 my @massbank_ids = @{ $pcgroups->{$pc}{'enrich_annotation'}{$mz} } ; ## get validated ids relative to one mz | |
| 382 $nb_ids = scalar (@massbank_ids) ; | |
| 383 # print "- - - NB RECORDS FOR MZ $mz = $nb_ids - - STATUS => \t" ; | |
| 384 my $massbank_ids_string = undef ; | |
| 385 ## manage empty array | |
| 386 if (!defined $nb_ids) { carp "The number of massbank ids is not defined\n" ; } | |
| 387 elsif ( $nb_ids > 0 ) { | |
| 388 ## get data from records and init_annotation | |
| 389 my $index_entries = 0 ; | |
| 390 foreach my $record_id (@massbank_ids) { | |
| 391 my $massbank_name = $records->{$record_id}{names}[0] ; | |
| 392 my $massbank_id = $record_id ; | |
| 393 my $massbank_formula = $records->{$record_id}{formula} ; | |
| 394 my $massbank_cpd_mz = $records->{$record_id}{exact_mz} ; | |
| 395 my $massbank_adduct = $records->{$record_id}{precursor_type} ; | |
| 396 my $massbank_score = 0 ; | |
| 397 | |
| 398 ## getting the score | |
| 399 my @filtered_records= @{ $pcgroups->{$pc}{'annotation'}{res} } ; | |
| 400 foreach my $record (@filtered_records) { | |
| 401 if ($record->{id} eq $massbank_id ) { | |
| 402 $massbank_score = $record->{score} ; | |
| 403 last ; | |
| 404 } | |
| 405 else { | |
| 406 next ; | |
| 407 } | |
| 408 } | |
| 409 | |
| 410 ## METLIN data display model | |
| 411 ## entry1= ENTRY_DELTA::ENTRY_ENTRY_NAME::ENTRY_CPD_MZ::ENTRY_FORMULA::ENTRY_ADDUCT::ENTRY_ENTRY_ID | entry2=VAR1::VAR2::VAR3::VAR4|... | |
| 412 my $massbank_id_string = $massbank_score.'::['."$massbank_name".']::'.$massbank_cpd_mz.'::'.$massbank_formula.'::['.$massbank_adduct.']::'.$massbank_id ; | |
| 413 | |
| 414 # manage final pipe | |
| 415 if ($index_entries < $nb_ids-1 ) { $massbank_ids_string .= $massbank_id_string.' | ' ; } | |
| 416 else { $massbank_ids_string .= $massbank_id_string ; } | |
| 417 $index_entries++; | |
| 418 } | |
| 419 } | |
| 420 elsif ( $nb_ids == 0 ) { $massbank_ids_string = 'NONE' ; } | |
| 421 else { | |
| 422 $massbank_ids_string = 'NONE' ; | |
| 423 } | |
| 424 # print "$massbank_ids_string\n" ; | |
| 425 push (@ids, $massbank_ids_string) ; | |
| 426 } ## End if | |
| 427 else { | |
| 428 next; | |
| 429 } | |
| 430 $current_pos++ ; | |
| 431 | |
| 432 push (@massbank_matrix, \@ids) ; | |
| 433 } ## End foreach mz | |
| 434 # print "* * * * Start of the MATRIX: * * * *\n" ; | |
| 435 # print Dumper @massbank_matrix ; | |
| 436 # print "* * * * END of the MATRIX * * * *\n" ; | |
| 437 return(\@massbank_matrix) ; | |
| 438 } | |
| 439 ## END of SUB | |
| 440 | |
| 441 =head2 METHOD add_massbank_matrix_to_input_matrix | |
| 442 | |
| 443 ## Description : build a full matrix (input + lm column) | |
| 444 ## Input : $input_matrix_object, $massbank_matrix_object | |
| 445 ## Output : $output_matrix_object | |
| 446 ## Usage : my ( $output_matrix_object ) = add_massbank_matrix_to_input_matrix( $input_matrix_object, $massbank_matrix_object ) ; | |
| 447 | |
| 448 =cut | |
| 449 ## START of SUB | |
| 450 sub add_massbank_matrix_to_input_matrix { | |
| 451 ## Retrieve Values | |
| 452 my $self = shift ; | |
| 453 my ( $input_matrix_object, $massbank_matrix_object ) = @_ ; | |
| 454 | |
| 455 my @output_matrix_object = () ; | |
| 456 my $index_row = 0 ; | |
| 457 | |
| 458 foreach my $row ( @{$input_matrix_object} ) { | |
| 459 my @init_row = @{$row} ; | |
| 460 | |
| 461 if ( $massbank_matrix_object->[$index_row] ) { | |
| 462 my $dim = scalar(@{$massbank_matrix_object->[$index_row]}) ; | |
| 463 | |
| 464 if ($dim > 1) { warn "the add method can't manage more than one column\n" ;} | |
| 465 my $lm_col = $massbank_matrix_object->[$index_row][$dim-1] ; | |
| 466 | |
| 467 push (@init_row, $lm_col) ; | |
| 468 $index_row++ ; | |
| 469 } | |
| 470 push (@output_matrix_object, \@init_row) ; | |
| 471 } | |
| 472 return(\@output_matrix_object) ; | |
| 473 } | |
| 474 ## END of SUB | |
| 475 | |
| 476 =head2 METHOD map_res_to_generic_json | |
| 477 | |
| 478 ## Description : build json structure with all massbank results | |
| 479 ## Input : $mzs, $pcs, $pcgroups_results | |
| 480 ## Output : $json_scalar | |
| 481 ## Usage : my ( $json_scalar ) = add_massbank_matrix_to_input_matrix( $mzs, $pcs, $pcgroups_results ) ; | |
| 482 | |
| 483 =cut | |
| 484 ## START of SUB | |
| 485 sub map_pc_to_generic_json { | |
| 486 my $self = shift; | |
| 487 my ( $pcs, $pcgroups, $records ) = @_ ; | |
| 488 | |
| 489 # print Dumper $pcgroups ; | |
| 490 # print Dumper $records ; | |
| 491 | |
| 492 ## JSON DESIGN | |
| 493 my %JSON = ( | |
| 494 QUERY => {}, | |
| 495 PARAM => {}, | |
| 496 TYPE => {} | |
| 497 ) ; | |
| 498 | |
| 499 my %oEntry = ( | |
| 500 mzmed => undef, | |
| 501 into => undef, | |
| 502 mzmin => undef, | |
| 503 mzmax => undef, | |
| 504 pcgroup => undef, | |
| 505 num_res => undef, | |
| 506 RECORDS => undef, | |
| 507 ) ; | |
| 508 | |
| 509 | |
| 510 my %oRecord = ( | |
| 511 id => undef, | |
| 512 exact_mz => undef, | |
| 513 score => undef, | |
| 514 formula => undef, | |
| 515 inchi => undef, | |
| 516 ms_type => undef, | |
| 517 precursor_type => undef, | |
| 518 instrument_type => undef, | |
| 519 name => undef, | |
| 520 peaks => undef, | |
| 521 ) ; | |
| 522 | |
| 523 | |
| 524 | |
| 525 foreach my $pc (@{$pcs}) { | |
| 526 | |
| 527 my $pc_res = {} ; | |
| 528 my $num_res = undef ; | |
| 529 | |
| 530 if ($pcgroups->{$pc}) { | |
| 531 my $pos = 0 ; | |
| 532 ## foreach mz of the pcgroup | |
| 533 foreach my $mz (@{ $pcgroups->{$pc}{mzmed} } ) { | |
| 534 | |
| 535 my %entry = %oEntry ; | |
| 536 ## | |
| 537 if ( defined $mz ) { $entry{mzmed} = $mz ; } | |
| 538 if ( $pcgroups->{$pc}{intervales}{$mz} ) { $entry{mzmin} = $pcgroups->{$pc}{intervales}{$mz}{min} ; } | |
| 539 if ( $pcgroups->{$pc}{intervales}{$mz} ) { $entry{mzmax} = $pcgroups->{$pc}{intervales}{$mz}{max} ; } | |
| 540 if ( $pcgroups->{$pc}{into}[$pos] ) { $entry{into} = $pcgroups->{$pc}{into}[$pos] ; } | |
| 541 if ( defined $pc ) { $entry{pcgroup} = $pc ; } | |
| 542 ## get RECORDS | |
| 543 if ( $pcgroups->{$pc}{enrich_annotation}{$mz} ) { | |
| 544 | |
| 545 my @recs = @{ $pcgroups->{$pc}{enrich_annotation}{$mz} } ; | |
| 546 $entry{num_res} = scalar(@recs) ; | |
| 547 | |
| 548 foreach my $recId (@recs) { | |
| 549 | |
| 550 my %record = %oRecord ; | |
| 551 if ( $records->{$recId} ) { $record{id} = $recId ; } | |
| 552 if ( $records->{$recId}{exact_mz} ) { $record{exact_mz} = $records->{$recId}{exact_mz} ; } | |
| 553 if ( $records->{$recId}{formula} ) { $record{formula} = $records->{$recId}{formula} ; } | |
| 554 if ( $records->{$recId}{ms_type} ) { $record{ms_type} = $records->{$recId}{ms_type} ; } | |
| 555 if ( $records->{$recId}{precursor_type} ) { $record{precursor_type} = $records->{$recId}{precursor_type} ; } | |
| 556 if ( $records->{$recId}{instrument_type} ) { $record{instrument_type} = $records->{$recId}{instrument_type} ; } | |
| 557 if ( $records->{$recId}{names} ) { $record{name} = $records->{$recId}{names}[0] ; } | |
| 558 if ( $records->{$recId}{inchi} ) { $record{inchi} = $records->{$recId}{inchi} ; } | |
| 559 ## peaks TODO... | |
| 560 | |
| 561 ## Score / BIG SHIT / | |
| 562 foreach my $record (@{ $pcgroups->{$pc}{'annotation'}{res} }) { | |
| 563 if ($record->{id} eq $recId ) { | |
| 564 $record{score} = $record->{score} ; | |
| 565 last ; | |
| 566 } | |
| 567 else { | |
| 568 next ; | |
| 569 } | |
| 570 } ## foreach record - - - for score | |
| 571 $entry{RECORDS}{$recId} = \%record ; | |
| 572 } ## foreach recId | |
| 573 } ## end IF | |
| 574 | |
| 575 $JSON{QUERY}{$mz} = \%entry ; | |
| 576 $pos ++ ; | |
| 577 } ## End FOREACH MZ | |
| 578 } | |
| 579 else { | |
| 580 warn "The pc group $pc doesn't exist in results !" ; | |
| 581 } | |
| 582 } | |
| 583 # print Dumper %JSON ; | |
| 584 return(\%JSON) ; | |
| 585 } | |
| 586 ## END of SUB | |
| 587 | |
| 588 | |
| 589 =head2 METHOD mapGroupsWithRecords | |
| 590 | |
| 591 ## Description : map records with pcgroups mz to adjust massbank id annotations | |
| 592 ## Input : $pcgroups, $records | |
| 593 ## Output : $pcgroups | |
| 594 ## Usage : my ( $var4 ) = mapGroupsWithRecords ( $$pcgroups, $records ) ; | |
| 595 | |
| 596 =cut | |
| 597 ## START of SUB | |
| 598 sub mapGroupsWithRecords { | |
| 599 ## Retrieve Values | |
| 600 my $self = shift ; | |
| 601 my ( $pcgroups, $records ) = @_; | |
| 602 | |
| 603 my %temp = () ; | |
| 604 my (%intervales, @annotation_ids) = ( (), () ) ; | |
| 605 | |
| 606 if ( ( defined $pcgroups ) and ( defined $records ) ) { | |
| 607 | |
| 608 %temp = %{$pcgroups} ; | |
| 609 my %unik_real_ids = () ; | |
| 610 my @real_ids = () ; | |
| 611 | |
| 612 foreach my $pc (keys %temp) { | |
| 613 | |
| 614 if ( $temp{$pc}{'intervales'} ) { %intervales = %{$temp{$pc}{'intervales'}} ; } | |
| 615 else { warn "Cant't find any intervale values\n" ; } | |
| 616 if ( $temp{$pc}{'massbank_ids'} ) { @annotation_ids = @{$temp{$pc}{'massbank_ids'}} ; } | |
| 617 else { warn "Cant't find any massbank id values\n" ; } | |
| 618 | |
| 619 # print Dumper %intervales; | |
| 620 # print Dumper @annotation_ids ; | |
| 621 | |
| 622 ## map with intervales | |
| 623 foreach my $mz (keys %intervales) { | |
| 624 my @filteredIds = () ; | |
| 625 my ( $min, $max ) = ( $intervales{$mz}{'min'}, $intervales{$mz}{'max'} ) ; | |
| 626 | |
| 627 foreach my $id (@annotation_ids) { | |
| 628 # print "Analyse mzs of id: $id...\n" ; | |
| 629 if ( (defined $id) and ( $records->{$id}) ) { | |
| 630 | |
| 631 my %currentRecord = %{$records->{$id}} ; | |
| 632 | |
| 633 if (scalar @{$currentRecord{'peaks'} } > 0 ) { | |
| 634 ## | |
| 635 foreach my $peak_mz (@{ $currentRecord{'peaks'} } ) { | |
| 636 if ($peak_mz) { | |
| 637 my $record_mz = $peak_mz->{'mz'} ; | |
| 638 if ( ($record_mz > $min ) and ($record_mz < $max) ){ | |
| 639 | |
| 640 if (!exists $unik_real_ids{$id}) { | |
| 641 $unik_real_ids{$id} = 1 ; | |
| 642 push (@filteredIds, $id) ; | |
| 643 # print "$mz - - $id\n" ; | |
| 644 } | |
| 645 | |
| 646 | |
| 647 } | |
| 648 else { | |
| 649 next ; | |
| 650 } | |
| 651 } | |
| 652 else { | |
| 653 warn "The mz field is not defined\n" ; | |
| 654 } | |
| 655 } ## foreach | |
| 656 } | |
| 657 else { | |
| 658 warn "The record ($id) has no peak\n" ; | |
| 659 } | |
| 660 } | |
| 661 else { | |
| 662 if (defined $id) { | |
| 663 warn "The id $id seems to be not present in getting records\n" ; | |
| 664 } | |
| 665 else { | |
| 666 warn "This catched id seems to be undef in getting records\n" ; | |
| 667 } | |
| 668 | |
| 669 next ; | |
| 670 } | |
| 671 } ## end foreach | |
| 672 ## to avoid multiple ids | |
| 673 # foreach my $id (keys %unik_real_ids) { | |
| 674 # push(@real_ids, $id) ; | |
| 675 # } | |
| 676 %unik_real_ids = () ; | |
| 677 # my @temp = @real_ids ; | |
| 678 my @temp = @filteredIds ; | |
| 679 $temp{$pc}{'enrich_annotation'}{$mz} = \@temp ; | |
| 680 @real_ids = () ; | |
| 681 @filteredIds = () ; | |
| 682 } ## End foreach mz | |
| 683 @annotation_ids = () ; | |
| 684 } ## End foreach pc | |
| 685 } | |
| 686 else { | |
| 687 warn"Can't find record or pcgroup data\n" ; | |
| 688 } | |
| 689 | |
| 690 return (\%temp) ; | |
| 691 } | |
| 692 ### END of SUB | |
| 693 | |
| 694 =head2 METHOD set_html_tbody_object | |
| 695 | |
| 696 ## Description : initializes and build the tbody object (perl array) needed to html template | |
| 697 ## Input : $nb_pages, $nb_items_per_page | |
| 698 ## Output : $tbody_object | |
| 699 ## Usage : my ( $tbody_object ) = set_html_tbody_object($nb_pages, $nb_items_per_page) ; | |
| 700 | |
| 701 =cut | |
| 702 ## START of SUB | |
| 703 sub set_html_tbody_object { | |
| 704 my $self = shift ; | |
| 705 my ( $nb_pages, $nb_items_per_page ) = @_ ; | |
| 706 | |
| 707 my ( @tbody_object ) = ( ) ; | |
| 708 | |
| 709 for ( my $i = 1 ; $i <= $nb_pages ; $i++ ) { | |
| 710 | |
| 711 my %pages = ( | |
| 712 # tbody feature | |
| 713 PAGE_NB => $i, | |
| 714 MASSES => [], ## end MASSES | |
| 715 ) ; ## end TBODY N | |
| 716 push (@tbody_object, \%pages) ; | |
| 717 } | |
| 718 return(\@tbody_object) ; | |
| 719 } | |
| 720 ## END of SUB | |
| 721 | |
| 722 =head2 METHOD add_mz_to_tbody_object | |
| 723 | |
| 724 ## Description : initializes and build the mz object (perl array) needed to html template | |
| 725 ## Input : $tbody_object, $nb_items_per_page, $mz_list | |
| 726 ## Output : $tbody_object | |
| 727 ## Usage : my ( $tbody_object ) = add_mz_to_tbody_object( $tbody_object, $nb_items_per_page, $mz_list ) ; | |
| 728 | |
| 729 =cut | |
| 730 ## START of SUB | |
| 731 sub add_mz_to_tbody_object { | |
| 732 my $self = shift ; | |
| 733 my ( $tbody_object, $nb_items_per_page, $mz_list, $json ) = @_ ; | |
| 734 | |
| 735 my ( $current_page, $mz_index ) = ( 0, 0 ) ; | |
| 736 | |
| 737 foreach my $page ( @{$tbody_object} ) { | |
| 738 | |
| 739 my @colors = ('white', 'green') ; | |
| 740 my ( $current_index, , $icolor ) = ( 0, 0 ) ; | |
| 741 | |
| 742 for ( my $i = 1 ; $i <= $nb_items_per_page ; $i++ ) { | |
| 743 # | |
| 744 if ( $current_index > $nb_items_per_page ) { ## manage exact mz per html page | |
| 745 $current_index = 0 ; | |
| 746 last ; ## | |
| 747 } | |
| 748 else { | |
| 749 $current_index++ ; | |
| 750 if ( $icolor > 1 ) { $icolor = 0 ; } | |
| 751 | |
| 752 if ( exists $mz_list->[$mz_index] ) { | |
| 753 | |
| 754 my %mz = ( | |
| 755 # mass feature | |
| 756 MASSES_ID_QUERY => "mz_0".sprintf("%04s", $mz_index+1 ) , | |
| 757 MASSES_MZ_QUERY => $mz_list->[$mz_index], | |
| 758 MASSES_PCGROUP_QUERY => $json->{QUERY}{ $mz_list->[$mz_index] }{pcgroup} , | |
| 759 MZ_COLOR => $colors[$icolor], | |
| 760 MASSES_NB => $mz_index+1, | |
| 761 ENTRIES => [] , | |
| 762 ) ; | |
| 763 push ( @{ $tbody_object->[$current_page]{MASSES} }, \%mz ) ; | |
| 764 # Html attr for mass | |
| 765 $icolor++ ; | |
| 766 } | |
| 767 } | |
| 768 $mz_index++ ; | |
| 769 } ## foreach mz | |
| 770 | |
| 771 $current_page++ ; | |
| 772 } | |
| 773 return($tbody_object) ; | |
| 774 } | |
| 775 ## END of SUB | |
| 776 | |
| 777 =head2 METHOD add_entries_to_tbody_object | |
| 778 | |
| 779 ## Description : initializes and build the entries object (perl array) needed to html template | |
| 780 ## Input : $tbody_object, $nb_items_per_page, $mz_list, $entries | |
| 781 ## Output : $tbody_object | |
| 782 ## Usage : my ( $tbody_object ) = add_entries_to_tbody_object( $tbody_object, $nb_items_per_page, $mz_list, $entries ) ; | |
| 783 | |
| 784 =cut | |
| 785 ## START of SUB | |
| 786 sub add_entries_to_tbody_object { | |
| 787 ## Retrieve Values | |
| 788 my $self = shift ; | |
| 789 my ( $tbody_object, $nb_items_per_page, $mz_list, $JSON ) = @_ ; | |
| 790 | |
| 791 my $index_page = 0 ; | |
| 792 my $index_mz_continous = 0 ; | |
| 793 | |
| 794 foreach my $page (@{$tbody_object}) { | |
| 795 | |
| 796 my $index_mz = 0 ; | |
| 797 | |
| 798 foreach my $mz (@{ $tbody_object->[$index_page]{MASSES} }) { | |
| 799 my $index_entry = 0 ; | |
| 800 my $check_noentry = 0 ; | |
| 801 my @toSort = () ; | |
| 802 | |
| 803 foreach my $record (keys %{ $JSON->{QUERY}{$mz->{MASSES_MZ_QUERY}}{RECORDS} }) { | |
| 804 $check_noentry ++ ; | |
| 805 | |
| 806 my %entry = ( | |
| 807 ENTRY_COLOR => $tbody_object->[$index_page]{MASSES}[$index_mz]{MZ_COLOR}, | |
| 808 ENTRY_ENTRY_NAME => $JSON->{QUERY}{$mz->{MASSES_MZ_QUERY}}{RECORDS}{$record}{name}, | |
| 809 ENTRY_ENTRY_ID => $JSON->{QUERY}{$mz->{MASSES_MZ_QUERY}}{RECORDS}{$record}{id}, | |
| 810 ENTRY_ENTRY_ID2 => $JSON->{QUERY}{$mz->{MASSES_MZ_QUERY}}{RECORDS}{$record}{id}, | |
| 811 ENTRY_FORMULA => $JSON->{QUERY}{$mz->{MASSES_MZ_QUERY}}{RECORDS}{$record}{formula}, | |
| 812 ENTRY_CPD_MZ => $JSON->{QUERY}{$mz->{MASSES_MZ_QUERY}}{RECORDS}{$record}{exact_mz}, | |
| 813 ENTRY_MS_TYPE => $JSON->{QUERY}{$mz->{MASSES_MZ_QUERY}}{RECORDS}{$record}{ms_type}, | |
| 814 ENTRY_PRECURSOR_TYPE => $JSON->{QUERY}{$mz->{MASSES_MZ_QUERY}}{RECORDS}{$record}{precursor_type}, | |
| 815 ENTRY_INSTRUMENT_TYPE => $JSON->{QUERY}{$mz->{MASSES_MZ_QUERY}}{RECORDS}{$record}{instrument_type}, | |
| 816 ENTRY_SCORE => $JSON->{QUERY}{$mz->{MASSES_MZ_QUERY}}{RECORDS}{$record}{score}, | |
| 817 ENTRY_ENTRY_INCHI => $JSON->{QUERY}{$mz->{MASSES_MZ_QUERY}}{RECORDS}{$record}{inchi}, | |
| 818 ) ; | |
| 819 push ( @{ $tbody_object->[$index_page]{MASSES}[$index_mz]{ENTRIES} }, \%entry) ; | |
| 820 | |
| 821 $index_entry++ ; | |
| 822 } ## end foreach record | |
| 823 if ($check_noentry == 0 ) { | |
| 824 my %entry = ( | |
| 825 ENTRY_COLOR => $tbody_object->[$index_page]{MASSES}[$index_mz]{MZ_COLOR}, | |
| 826 ENTRY_ENTRY_NAME => 'UNKNOWN', | |
| 827 ENTRY_ENTRY_ID => 'NONE', | |
| 828 ENTRY_ENTRY_ID2 => '', | |
| 829 ENTRY_FORMULA => 'n/a', | |
| 830 ENTRY_CPD_MZ => 'n/a', | |
| 831 ENTRY_MS_TYPE => 'n/a', | |
| 832 ENTRY_PRECURSOR_TYPE => 'n/a', | |
| 833 ENTRY_INSTRUMENT_TYPE => 'n/a', | |
| 834 ENTRY_SCORE => 0, | |
| 835 ENTRY_ENTRY_INCHI => 'n/a', | |
| 836 ) ; | |
| 837 push ( @{ $tbody_object->[$index_page]{MASSES}[$index_mz]{ENTRIES} }, \%entry) ; | |
| 838 } | |
| 839 | |
| 840 ## sorted by score | |
| 841 my @sorted = () ; | |
| 842 my @temp = @{ $tbody_object->[$index_page]{MASSES}[$index_mz]{ENTRIES} } ; | |
| 843 if (scalar (@temp) > 1 ) { ## for mz without record (only one entry with NA or 0 values) | |
| 844 @sorted = sort { $b->{ENTRY_SCORE} <=> $a->{ENTRY_SCORE} } @temp; | |
| 845 } | |
| 846 else { | |
| 847 @sorted = @temp; | |
| 848 } | |
| 849 | |
| 850 $tbody_object->[$index_page]{MASSES}[$index_mz]{ENTRIES} = \@sorted ; | |
| 851 | |
| 852 $index_mz ++ ; | |
| 853 $index_mz_continous ++ ; | |
| 854 | |
| 855 } ## End foreach mz | |
| 856 $index_page++ ; | |
| 857 | |
| 858 } ## End foreach page | |
| 859 # print Dumper $tbody_object ; | |
| 860 return($tbody_object) ; | |
| 861 } | |
| 862 ## END of SUB | |
| 863 | |
| 864 | |
| 865 | |
| 866 1 ; | |
| 867 | |
| 868 | |
| 869 __END__ | |
| 870 | |
| 871 =head1 SUPPORT | |
| 872 | |
| 873 You can find documentation for this module with the perldoc command. | |
| 874 | |
| 875 perldoc XXX.pm | |
| 876 | |
| 877 =head1 Exports | |
| 878 | |
| 879 =over 4 | |
| 880 | |
| 881 =item :ALL is ... | |
| 882 | |
| 883 =back | |
| 884 | |
| 885 =head1 AUTHOR | |
| 886 | |
| 887 Franck Giacomoni E<lt>franck.giacomoni@clermont.inra.frE<gt> | |
| 888 | |
| 889 =head1 LICENSE | |
| 890 | |
| 891 This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. | |
| 892 | |
| 893 =head1 VERSION | |
| 894 | |
| 895 version 1 : xx / xx / 201x | |
| 896 | |
| 897 version 2 : ?? | |
| 898 | |
| 899 =cut |
