comparison lib/msp.pm @ 0:e3d43b8c987b draft

Init repository with last tool-bank-golm-lib_search master version
author fgiacomoni
date Mon, 05 Dec 2016 08:32:04 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:e3d43b8c987b
1 package lib::msp ;
2
3 use strict;
4 use warnings ;
5 use Exporter ;
6 use Carp ;
7
8 use Data::Dumper ;
9 use List::MoreUtils qw(uniq);
10
11 use vars qw($VERSION @ISA @EXPORT %EXPORT_TAGS);
12
13 our $VERSION = "1.0";
14 our @ISA = qw(Exporter);
15 our @EXPORT = qw( get_mzs get_intensities get_masses_from_string get_intensities_from_string keep_only_max_masses keep_only_max_intensities encode_spectrum_for_query sorting_descending_intensities round_num apply_relative_intensity remove_redundants);
16 our %EXPORT_TAGS = ( ALL => [qw( get_mzs get_intensities get_masses_from_string get_intensities_from_string keep_only_max_masses keep_only_max_intensities encode_spectrum_for_query sorting_descending_intensities round_num apply_relative_intensity remove_redundants)] );
17
18 =head1 NAME
19
20 My::Module - An example module
21
22 =head1 SYNOPSIS
23
24 use My::Module;
25 my $object = My::Module->new();
26 print $object->as_string;
27
28 =head1 DESCRIPTION
29
30 This module does not really exist, it
31 was made for the sole purpose of
32 demonstrating how POD works.
33
34 =head1 METHODS
35
36 Methods are :
37
38 =head2 METHOD new
39
40 ## Description : new
41 ## Input : $self
42 ## Ouput : bless $self ;
43 ## Usage : new() ;
44
45 =cut
46
47 sub new {
48 ## Variables
49 my $self={};
50 bless($self) ;
51 return $self ;
52 }
53 ### END of SUB
54
55
56
57 =head2 METHOD get_mzs
58
59 ## Description : parse msp file and get mzs
60 ## Input : $msp_file, $mzRes, $maxIon
61 ## Output : \@total_spectra_mzs
62 ## Usage : my ( $mzs ) = get_mzs( $msp_file , $mzRes, $maxIon) ;
63 ## Structure of res: [ $arr_ref1 , $arr_ref2 ... $arr_refN ]
64 =cut
65 ## START of SUB
66 sub get_mzs {
67 ## Retrieve Values
68 my $self = shift ;
69 my ( $msp_file, $mzRes ) = @_ ;
70
71 my @ions = () ;
72 my @temp_mzs = () ;
73 my @uniq_masses ;
74 my @mzs = ();
75 my @total_spectra_mzs = ();
76 my $mz ;
77 my $i = 0 ;
78
79 open (MSP , "<" , $msp_file) or die $! ;
80
81 {
82 local $/ = 'Name' ;
83 my @infos = () ;
84 # One line is : "Name -> Name" englobing a whole spectrum with all infos
85 while(my $line = <MSP>) {
86
87 chomp $line;
88 @infos = split (/\n/ , $line) ;
89 # Loop over all lines of a spectrum
90 for (my $i=0 ; $i<@infos ; $i++) {
91 # Detect spectrum lines only
92 if ($infos[$i] =~ /(\d+\.?\d*)\s+(\d+\.?\d*)\s*;\s*/) {
93
94 @ions = split ( /;/ , $infos[$i] ) ;
95 # Retrieve mzs according to maxIons value
96 foreach my $ion (@ions) {
97
98 if ($ion =~ /^\s*(\d+\.?\d*)\s+(\d+\.?\d*)$/) {
99
100 $mz = $1 ;
101 # Truncate/round mzs depending on $mzRes wanted
102 if ($mzRes == 0) {
103 my $mz_rounded = sprintf("%.".$mzRes."f", $mz) ;
104 push (@temp_mzs , $mz_rounded) ;
105 }
106 # Check that $mzRes is not greater than the number of digits after comma
107 elsif ($mzRes > 0) {
108 if ($mz !~ /^\d+\.\d+$/) { croak "*********\n\nYou are trying to specify $mzRes significant decimals, but one or more masses in the input file are unitary masses.\nYou should try again with mzRes = 0\n\n\n"; }
109 elsif($mzRes > length(( $mz =~ /.+\.(.*)/)[0] )) {
110 $mz = sprintf("%.".$mzRes."f" , $mz) ;
111 }
112 my $mz_rounded = _round_num($mz,$mzRes) ;
113 push (@temp_mzs , $$mz_rounded) ;
114 }
115 }
116 }
117 }
118 }
119 if($line ne '') {
120 @{ $total_spectra_mzs[$i] } = @temp_mzs ;
121 $i++ ;
122 @temp_mzs = () ;
123 }
124 }
125 }
126 #print Dumper \@total_spectra_mzs ;
127 close (MSP) ;
128 return(\@total_spectra_mzs) ;
129 }
130 ## END of SUB
131
132
133
134
135 =head2 METHOD get_intensities
136
137 ## Description : parse msp file and get intensities
138 ## Input : $msp_file, $maxIons
139 ## Output : \@total_spectra_intensities
140 ## Usage : my ( $intensities ) = get_mzs( $msp_file, $maxIons ) ;
141 ## Structure of res: [ $arr_ref1 , $arr_ref2 ... $arr_refN ]
142 =cut
143 ## START of SUB
144 sub get_intensities {
145 ## Retrieve Values
146 my $self = shift ;
147 my ( $msp_file ) = @_ ;
148
149 my @ions = () ;
150 my @temp_intensities = () ;
151 my @intensities = () ;
152 my @total_spectra_intensities = ();
153 my $i = 0 ;
154
155 open (MSP , "<" , $msp_file) or die $! ;
156
157 {
158 local $/ = 'Name' ;
159 my @infos = () ;
160 # Extract spectrum
161 while(my $line = <MSP>) {
162 chomp $line;
163 @infos = split (/\n/ , $line) ;
164 #Detect spectrum
165 for (my $i=0 ; $i<@infos ; $i++) {
166 if ($infos[$i] =~ /(\d+\.?\d*)\s+(\d+\.?\d*)\s*;\s*?/) {
167 @ions = split ( /;/ , $infos[$i] ) ;
168 # Retrieve intensities
169 foreach my $ion (@ions) {
170 if ($ion =~ /^\s*(\d+\.?\d*)\s+(\d+\.?\d*)$/) {
171 my $intensity = $2 ;
172 push ( @temp_intensities , $intensity ) ;
173 }
174 }
175 }
176 }
177 if($line ne '') {
178 @{ $total_spectra_intensities[$i] } = @temp_intensities ;
179 $i++ ;
180 @temp_intensities = () ;
181 }
182 }
183 }
184 close (MSP) ;
185 return(\@total_spectra_intensities) ;
186 }
187 ## END of SUB
188
189
190 =head2 METHOD get_masses_from_string
191
192 ## Description : parse a spectrum string and get mzs and intensities
193 ## Input : $spectrum_string, $mzRes
194 ## Output : \@spectrum_intensities_mzs
195 ## Usage : my ( $spectrum_mzs ) = get_masses_from_string( $spectrum_string , $mzRes ) ;
196 =cut
197 ## START of SUB
198 sub get_masses_from_string {
199 ## Retrieve Values
200 my $self = shift ;
201 my ( $spectrum_string, $mzRes ) = @_ ;
202
203 my @intensities = () ;
204 my @mzs = () ;
205
206 if (defined $spectrum_string) {
207
208 if ($spectrum_string ne '') {
209
210 if ($spectrum_string =~ /\s*(\d+\.?\d*)\s+(\d+\.?\d*)\s*/ ) {
211
212 my @val = split (/\s+/ , $spectrum_string) ;
213 for (my $i=0 ; $i<@val ; $i++) {
214 if ($i%2 == 0) {
215 my $mz = $val[$i] ;
216 # Truncate/round mzs depending on $mzRes wanted
217 if ($mzRes == 0) {
218 $mz = int($mz) ;
219 push ( @mzs , $val[$i] ) ;
220 }
221 # Check that $mzRes is not greater than the number of digits after comma
222 elsif ($mzRes > 0) {
223 if($mzRes > length(( $mz =~ /.+\.(.*)/)[0] )) {
224 $mz = sprintf("%.".$mzRes."f" , $mz) ;
225 }
226 my $mz_rounded = _round_num($mz,$mzRes) ;
227 push ( @mzs , $$mz_rounded ) ;
228 }
229 }
230 }
231 return (\@mzs) ;
232 }
233 else { croak "Wrong format of the spectrum. See help\n" }
234 }
235 else { croak "Spectrum is empty, the service will stop\n" } ;
236 }
237 else { croak "Spectrum is not defined, service will stop\n" } ;
238 }
239 ## END of SUB
240
241
242
243 =head2 METHOD get_intensities_from_string
244
245 ## Description : parse a spectrum string and get intensities
246 ## Input : $spectrum_string
247 ## Output : \@spectrum_intensities
248 ## Usage : my ( $spectrum_intensities ) = get_intensities_from_string( $spectrum_string ) ;
249 =cut
250 ## START of SUB
251 sub get_intensities_from_string {
252 ## Retrieve Values
253 my $self = shift ;
254 my ( $spectrum_string ) = @_ ;
255
256 my @intensities = () ;
257 my @mzs = () ;
258
259 if (defined $spectrum_string) {
260
261 if ($spectrum_string ne '') {
262
263 if ($spectrum_string =~ /\s*(\d+\.?\d*)\s+(\d+\.?\d*)\s*/ ) {
264
265 my @val = split (/\s+/ , $spectrum_string) ;
266 for (my $i=0 ; $i<@val ; $i++) {
267 if ($i%2 != 0) {
268 my $int = $val[$i] ;
269 push ( @intensities , $int ) ;
270 }
271 }
272 return (\@intensities) ;
273 }
274 else { croak "Wrong format of the spectrum. See help\n" }
275 }
276 else { croak "Spectrum is empty, the service will stop\n" } ;
277 }
278 else { croak "Spectrum is not defined, service will stop\n" } ;
279 }
280 ## END of SUB
281
282
283
284
285
286 =head2 METHOD sorting_descending_intensities
287
288 ## Description : sort mzs and intensities arrays by descending intensity values
289 ## Input : $ref_mzs_res, $ref_ints_res
290 ## Output : \@mzs_res, \@ints_res
291 ## Usage : my ( \@mzs_res, \@ints_res ) = sorting_descending_intensities( $ref_mzs_res, $ref_ints_res ) ;
292 =cut
293 ## START of SUB
294 sub sorting_descending_intensities {
295 ## Retrieve Values
296 my $self = shift ;
297 my ( $ref_mzs_res, $ref_ints_res ) = @_ ;
298
299 my @mzs_res = () ;
300 my @ints_res = () ;
301
302 if ( defined $ref_mzs_res && defined $ref_ints_res ) {
303 if ( (scalar @$ref_mzs_res) != 0 && (scalar @$ref_ints_res) != 0 ) {
304
305 @mzs_res = @$ref_mzs_res ;
306 @ints_res = @$ref_ints_res ;
307
308 # Case when we have only one array of masses (input is a string of masses and not a file)
309 if ( ref(@$ref_ints_res[0]) ne "ARRAY") {
310
311 my @sorted_indices = sort { $ints_res[$b] <=> $ints_res[$a] } 0..$#ints_res;
312 @$_ = @{$_}[@sorted_indices] for \(@mzs_res, @ints_res);
313
314 }
315 else {
316 ## Sorting ions by decreasing intensity values
317 for (my $i=0 ; $i<@ints_res ; $i++) {
318 my @sorted_indices = sort { @{$ints_res[$i]}[$b] <=> @{$ints_res[$i]}[$a] } 0..$#{$ints_res[$i]};
319 @$_ = @{$_}[@sorted_indices] for \(@{$ints_res[$i]},@{$mzs_res[$i]});
320 }
321 }
322 }
323 else { carp "Cannot sort intensities, mzs or intensities are empty" ; return (\@mzs_res, \@ints_res) ; }
324 }
325 else { carp "Cannot sort intensities, mzs or intensities are undef" ; return (\@mzs_res, \@ints_res) ; }
326
327 return (\@mzs_res, \@ints_res) ;
328 }
329 ## END of SUB
330
331
332
333
334 =head2 METHOD keep_only_max_masses
335
336 ## Description : keep only $maxIons masses
337 ## Input : $mzs_res_sorted, $maxIons
338 ## Output : \@mzs
339 ## Usage : my ( $mzs ) = keep_only_max_masses( $mzs_res_sorted, $ints_res_sorted, $maxIons ) ;
340 =cut
341 ## START of SUB
342 sub keep_only_max_masses {
343 ## Retrieve Values
344 my $self = shift ;
345 my ( $ref_mzs_res, $maxIons ) = @_ ;
346
347 my @mzs = () ;
348 my @tot_mzs = () ;
349
350 if ( ref(@$ref_mzs_res[0]) ne "ARRAY") {
351 my $i = 0 ;
352 while (scalar @tot_mzs < $maxIons && $i < @$ref_mzs_res){
353 push (@tot_mzs , $$ref_mzs_res[$i++]) ;
354 }
355 }
356 else {
357 for (my $i=0 ; $i<@$ref_mzs_res ; $i++) {
358 my $j = 0 ;
359 while (scalar @mzs < $maxIons && $j < @$ref_mzs_res[$i]){
360 push (@mzs , $ref_mzs_res->[$i][$j++]) ;
361 }
362 push (@tot_mzs , \@mzs) ;
363 }
364 }
365 return (\@tot_mzs) ;
366 }
367 ## END of SUB
368
369
370
371
372 =head2 METHOD keep_only_max_intensities
373
374 ## Description : keep only $maxIons intensities
375 ## Input : $ints_res_sorted, $maxIons
376 ## Output : \@ints
377 ## Usage : my ( $ints ) = keep_only_max_intensities( $ints_res_sorted, $maxIons ) ;
378 =cut
379 ## START of SUB
380 sub keep_only_max_intensities {
381 ## Retrieve Values
382 my $self = shift ;
383 my ( $ref_ints_res, $maxIons ) = @_ ;
384
385 my @ints = () ;
386 my @tot_ints = () ;
387 if ( ref(@$ref_ints_res[0]) ne "ARRAY") {
388 my $i = 0 ;
389 while (scalar @tot_ints < $maxIons && $i < @$ref_ints_res){
390 push (@tot_ints , $$ref_ints_res[$i++]) ;
391 }
392 }
393 else {
394 for (my $i=0 ; $i<@$ref_ints_res ; $i++) {
395 my $j = 0 ;
396 while (scalar @ints < $maxIons && $j < @$ref_ints_res[$i]){
397 push (@ints , $ref_ints_res->[$i][$j++]) ;
398 }
399 push (@tot_ints , \@ints) ;
400 }
401 }
402 return (\@tot_ints) ;
403 }
404 ## END of SUB
405
406
407
408
409
410 =head2 METHOD encode_spectrum_for_query
411
412 ## Description : get mzs and intensities values and generate the spectra strings formatted for the WS query (html)
413 ## Input : $mzs, $intensities
414 ## Output : \@encoded_spectra
415 ## Usage : my ( $encoded_spectra ) = get_spectra( $mzs, $intensities ) ;
416
417 =cut
418 ## START of SUB
419 sub encode_spectrum_for_query {
420 ## Retrieve Values
421 my $self = shift ;
422 my ( $mzs, $intensities ) = @_ ;
423
424 my @encoded_spectra = () ;
425 my $spectrum = "" ;
426 my $k = 0 ;
427
428 #print Dumper $mzs ;
429
430 if ( defined $mzs && defined $intensities ) {
431 if ( @$mzs && @$intensities ) {
432
433 # Case when we have only one array of masses (input is a string of masses and not a file)
434 if ( ref(@$mzs[0]) ne "ARRAY") {
435 for (my $i=0 ; $i< @$mzs ; $i++) {
436 $spectrum = $spectrum . @$mzs[$i] . " " . @$intensities[$i] . " ";
437 }
438 push ( @encoded_spectra , $spectrum ) ;
439 }
440 else {
441 for (my $i=0 ; $i< @$mzs ; $i++) {
442
443 for ( my $j=0 ; $j< @{ @$mzs[$i] } ; $j++ ) {
444
445 $spectrum = $spectrum . $$mzs[$i][$j] . " " . $$intensities[$i][$j] . " ";
446 }
447 $encoded_spectra[$k] = $spectrum ;
448 $k++ ;
449 $spectrum = '' ;
450 }
451 }
452 }
453 else { carp "Cannot encode spectrum, mzs and intensities arrays are empty" ; return \@encoded_spectra ; }
454 }
455 else { carp "Cannot encode spectrum, mzs and intensities are undef" ; return \@encoded_spectra ; }
456 return \@encoded_spectra ;
457 }
458 ## END of SUB
459
460
461 =head2 METHOD round_num
462
463 ## Description : round a number by the sended decimal
464 ## Input : $number, $decimal
465 ## Output : $round_num
466 ## Usage : my ( $round_num ) = round_num( $number, $decimal ) ;
467
468 =cut
469 ## START of SUB
470 sub _round_num {
471 ## Retrieve Values
472 my ( $number, $decimal ) = @_ ;
473 my $round_num = 0 ;
474
475 if ( ( defined $decimal ) and ( $decimal > 0 ) and ( defined $number ) and ( $number > 0 ) ) {
476 $round_num = sprintf("%.".$decimal."f", $number); ## a rounding is used : 5.3 -> 5 and 5.5 -> 6
477 }
478 else {
479 croak "Can't round any number : missing value or decimal\n" ;
480 }
481
482 return(\$round_num) ;
483 }
484 ## END of SUB
485
486
487
488 =head2 METHOD apply_relative_intensity
489
490 ## Description : transform absolute intensities into relative intensities
491 ## Input : $intensities
492 ## Output : \@intensities
493 ## Usage : my ( $intensities ) = apply_relative_intensity( $intensities ) ;
494
495 =cut
496 ## START of SUB
497 sub apply_relative_intensity {
498 ## Retrieve Values
499 my $self = shift ;
500 my ($intensities) = @_ ;
501
502 my @intensities = @$intensities ;
503 my @relative_intensities ;
504
505 foreach my $ints (@intensities) {
506 my @relative_ints = map { ($_ * 100)/@$ints[0] } @$ints ;
507 push (@relative_intensities , \@relative_ints) ;
508 }
509 return \@relative_intensities ;
510 }
511 ## END of SUB
512
513
514
515 =head2 METHOD remove_redundants
516
517 ## Description : removes ions with redundant masses
518 ## Input : $masses $intensities
519 ## Output : \@intensities
520 ## Usage : my ( $uniq_masses, $uniq_intensities ) = remove_redundants( $masses, $intensities ) ;
521
522 =cut
523 ## START of SUB
524 sub remove_redundants {
525 ## Retrieve Values
526 my $self = shift ;
527 my ($masses, $intensities) = @_ ;
528
529 my %uniq = () ;
530 my @uniq_intensities = () ;
531
532 ## Create hash with key = mass and value = intensity
533 for (my $i=0 ; $i<@$masses ; $i++) {
534 $uniq{ @$masses[$i] } = @$intensities[$i] ;
535 }
536
537 ## Remove redundant masses
538 my @uniq_masses = uniq(@$masses) ;
539
540 ## Keep intensities corresponding to uniq masses
541 foreach my $mass (@uniq_masses) {
542 push (@uniq_intensities , $uniq{ $mass }) ;
543 }
544
545 return (\@uniq_masses , \@uniq_intensities) ;
546
547 }
548 ## END of SUB
549
550
551 #********************************************************************************************************
552 # FONCTION DU SEUIL POUR LE BRUIT, A DECOMMENTER SI FINALEMENT CE N'EST PAS GERE DANS LA BRIQUE MetaMS
553 #********************************************************************************************************
554
555
556 =head2 METHOD keep_ions_above_threshold
557
558 ## Description : keep only ions which intensities are above the threshold
559 ## Input : $mzs_res_sorted, $ints_res_sorted, $noiseThreshold
560 ## Output : $mzs_res_noise_threshold, $ints_res_noise_threshold
561 ## Usage : my ( $mzs_res_noise_threshold, $ints_res_noise_threshold ) = keep_ions_above_threshold( $mzs_res_sorted, $ints_res_sorted, $noiseThreshold ) ;
562
563 =cut
564 ## START of SUB
565 #sub keep_ions_above_threshold {
566 # ## Retrieve Values
567 # my $self = shift ;
568 # my ($mzs_res_sorted, $ints_res_sorted, $noiseThreshold) = @_ ;
569 #
570 # my (@mzs_res_noise_threshold, @ints_res_noise_threshold) = ( (),() ) ;
571 # my (@mzs_res_noise_threshold_temp, @ints_res_noise_threshold_temp) = ( (),() ) ;
572 # my $i = 0 ;
573 # my $j = 0 ;
574 # # Case when we have only one array of masses (input is a string of masses and not a file)
575 # if ( ref(@$mzs_res_sorted[0]) ne "ARRAY") {
576 #
577 # while( @$ints_res_sorted[$i] > $noiseThreshold && $i < scalar @$mzs_res_sorted) {
578 #
579 # push ( @mzs_res_noise_threshold , @$mzs_res_sorted[$i] ) ;
580 # push ( @ints_res_noise_threshold , @$ints_res_sorted[$i] ) ;
581 # $i++ ;
582 # }
583 # }
584 # else {
585 # while( $i < @$ints_res_sorted ) {
586 #
587 # while( $$ints_res_sorted[$i][$j] > $noiseThreshold && $j < scalar @$ints_res_sorted[$i]) {
588 #
589 # push ( @mzs_res_noise_threshold_temp , $$mzs_res_sorted[$i][$j] ) ;
590 # push ( @ints_res_noise_threshold_temp , $$ints_res_sorted[$i][$j] ) ;
591 # $j++ ;
592 # }
593 # push ( @mzs_res_noise_threshold , \@mzs_res_noise_threshold_temp ) ;
594 # push ( @ints_res_noise_threshold , \@ints_res_noise_threshold_temp ) ;
595 # $i++ ;
596 # }
597 # }
598 #
599 # return (\@mzs_res_noise_threshold, \@ints_res_noise_threshold) ;
600 #}
601 ## END of SUB
602
603
604 #********************************************************************************************************
605 #********************************************************************************************************
606 #********************************************************************************************************
607
608
609 1 ;
610
611
612 __END__
613
614 =head1 SUPPORT
615
616 You can find documentation for this module with the perldoc command.
617
618 perldoc csv.pm
619
620 =head1 Exports
621
622 =over 4
623
624 =item :ALL is get_spectra
625
626 =back
627
628 =head1 AUTHOR
629
630 Gabriel Cretin E<lt>gabriel.cretin@clermont.inra.frE<gt>
631
632 =head1 LICENSE
633
634 This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
635
636 =head1 VERSION
637
638 version 1 : 03 / 06 / 2016
639
640 version 2 : 24 / 06 / 2016
641
642 =cut