Mercurial > repos > fgiacomoni > massbank_ws_searchspectrum
comparison lib/massbank_parser.pm @ 0:023c380900ef draft default tip
Init repository with last massbank_ws_searchspectrum master version
| author | fgiacomoni |
|---|---|
| date | Wed, 19 Apr 2017 11:31:58 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:023c380900ef |
|---|---|
| 1 package lib::massbank_parser ; | |
| 2 | |
| 3 use strict; | |
| 4 use warnings ; | |
| 5 use Exporter ; | |
| 6 use Carp ; | |
| 7 | |
| 8 use File::Basename; | |
| 9 | |
| 10 use Data::Dumper ; | |
| 11 | |
| 12 use vars qw($VERSION @ISA @EXPORT %EXPORT_TAGS); | |
| 13 | |
| 14 our $VERSION = "1.0" ; | |
| 15 our @ISA = qw(Exporter) ; | |
| 16 our @EXPORT = qw( getChemNamesFromString getPeaksFromString ) ; | |
| 17 our %EXPORT_TAGS = ( ALL => [qw( getChemNamesFromString getPeaksFromString )] ) ; | |
| 18 | |
| 19 =head1 NAME | |
| 20 | |
| 21 parser::chem::massbank - An example module | |
| 22 | |
| 23 =head1 SYNOPSIS | |
| 24 | |
| 25 use parser::chem::massbank ; | |
| 26 my $object = parser::chem::massbank->new(); | |
| 27 print $object->as_string; | |
| 28 | |
| 29 =head1 DESCRIPTION | |
| 30 | |
| 31 This module does not really exist, it | |
| 32 was made for the sole purpose of | |
| 33 demonstrating how POD works. | |
| 34 | |
| 35 =head1 METHODS | |
| 36 | |
| 37 Methods are : | |
| 38 | |
| 39 =head2 METHOD new | |
| 40 | |
| 41 ## Description : new | |
| 42 ## Input : $self | |
| 43 ## Ouput : bless $self ; | |
| 44 ## Usage : new() ; | |
| 45 | |
| 46 =cut | |
| 47 | |
| 48 sub new { | |
| 49 ## Variables | |
| 50 my $self={}; | |
| 51 bless($self) ; | |
| 52 return $self ; | |
| 53 } | |
| 54 ### END of SUB | |
| 55 | |
| 56 =head2 METHOD get_list_of_analysis_intrument_names | |
| 57 | |
| 58 ## Description : permt de retourner la liste des nom uniques des instruments utilises | |
| 59 ## Input : $dir, $ms_files (a list of files) | |
| 60 ## Output : $names | |
| 61 ## Usage : my ( $names ) = get_list_of_analysis_intrument_names( $ms_files ) ; | |
| 62 | |
| 63 =cut | |
| 64 ## START of SUB | |
| 65 sub get_list_of_analysis_intrument_names { | |
| 66 ## Retrieve Values | |
| 67 my $self = shift ; | |
| 68 my ( $dir, $ms_files ) = @_ ; | |
| 69 my (%tmp_names, @names) = ( (), () ) ; | |
| 70 foreach my $ms_file (@{$ms_files}) { | |
| 71 my $file = $dir.'\\'.$ms_file ; | |
| 72 if ( ( defined $file ) and ( -e $file )) { | |
| 73 open(MS, "<$file") or die "Cant' read the file $file\n" ; | |
| 74 while ( my $field = <MS> ){ | |
| 75 chomp $field ; | |
| 76 if ($field =~/AC\$INSTRUMENT:(.*)/) { | |
| 77 if ( $tmp_names{$1} ) { last ; } | |
| 78 else { $tmp_names{$1} = 1 ; push (@names, $1) ; } | |
| 79 } | |
| 80 } | |
| 81 close(MS) ; | |
| 82 } | |
| 83 else { | |
| 84 croak "Can't work with a undef / none existing massbank file\n" ; | |
| 85 } | |
| 86 } | |
| 87 return(\@names) ; | |
| 88 } | |
| 89 ## END of SUB | |
| 90 | |
| 91 =head2 METHOD get_analysis_instruments_data | |
| 92 | |
| 93 ## Description : permet de recuperer tous les champs d'un object massbank | |
| 94 ## Input : $ms_file | |
| 95 ## Output : $features | |
| 96 ## Usage : my ( $features ) = get_analysis_instruments_data( $ms_file ) ; | |
| 97 | |
| 98 =cut | |
| 99 ## START of SUB | |
| 100 sub get_analysis_instruments_data { | |
| 101 ## Retrieve Values | |
| 102 my $self = shift ; | |
| 103 my ( $ms_file ) = @_ ; | |
| 104 | |
| 105 my $control = 0 ; | |
| 106 my %features = ( | |
| 107 'name' => undef, | |
| 108 'type' => undef, | |
| 109 ) ; | |
| 110 if ( ( defined $ms_file ) and ( -e $ms_file )) { | |
| 111 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ; | |
| 112 while ( my $field = <MS> ){ | |
| 113 chomp $field ; | |
| 114 if ($field =~/AC\$INSTRUMENT: (.*)/) { $features{'name'} = $1 ; $control++; } | |
| 115 elsif ($field =~/AC\$INSTRUMENT_TYPE: (.*)/) { $features{'type'} = $1 ; $control++; } | |
| 116 else { next ; } | |
| 117 } | |
| 118 close(MS) ; | |
| 119 } | |
| 120 else { | |
| 121 croak "Can't work with a undef / none existing massbank file\n" ; | |
| 122 } | |
| 123 if ($control == 0) { %features = () ; } | |
| 124 return(\%features) ; | |
| 125 } | |
| 126 ## END of SUB | |
| 127 | |
| 128 =head2 METHOD get_ms_methods_data | |
| 129 | |
| 130 ## Description : permet de recuperer tous les champs d'un object massbank | |
| 131 ## Input : $ms_file | |
| 132 ## Output : $features | |
| 133 ## Usage : my ( $features ) = get_ms_methods_data( $ms_file ) ; | |
| 134 | |
| 135 =cut | |
| 136 ## START of SUB | |
| 137 sub get_ms_methods_data { | |
| 138 ## Retrieve Values | |
| 139 my $self = shift ; | |
| 140 my ( $ms_file ) = @_ ; | |
| 141 | |
| 142 my $control = 0 ; | |
| 143 my %features = ( | |
| 144 'ion_mode' => undef, | |
| 145 'ms_type' => undef, | |
| 146 'collision_energy' => undef, | |
| 147 'collision_gas' => undef, | |
| 148 'desolvation_gas_flow' => undef, | |
| 149 'desolvation_temperature' => undef, | |
| 150 'ionization_energy' => undef, | |
| 151 'laser' => undef, | |
| 152 'matrix' => undef, | |
| 153 'mass_accuracy' => undef, | |
| 154 'reagent_gas' => undef, | |
| 155 'scanning' => undef | |
| 156 ) ; | |
| 157 if ( ( defined $ms_file ) and ( -e $ms_file )) { | |
| 158 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ; | |
| 159 while ( my $field = <MS> ){ | |
| 160 chomp $field ; | |
| 161 if ($field =~/AC\$MASS_SPECTROMETRY: ION_MODE:(.*)/) { $features{'ion_mode'} = $1 ; $control++; } # mandatory | |
| 162 elsif ($field =~/AC\$MASS_SPECTROMETRY: MS_TYPE:(.*)/) { $features{'ms_type'} = $1 ; $control++; } # mandatory | |
| 163 elsif ($field =~/AC\$MASS_SPECTROMETRY: COLLISION_ENERGY(.*)/) { $features{'collision_energy'} = $1 ; $control++; } # optionnal | |
| 164 elsif ($field =~/AC\$MASS_SPECTROMETRY: COLLISION_GAS(.*)/) { $features{'collision_gas'} = $1 ; $control++; } # optionnal | |
| 165 elsif ($field =~/AC\$MASS_SPECTROMETRY: DESOLVATION_GAS_FLOW(.*)/) { $features{'desolvation_gas_flow'} = $1 ; $control++; } # optionnal | |
| 166 elsif ($field =~/AC\$MASS_SPECTROMETRY: DESOLVATION_TEMPERATURE(.*)/) { $features{'desolvation_temperature'} = $1 ; $control++; } # optionnal | |
| 167 elsif ($field =~/AC\$MASS_SPECTROMETRY: IONIZATION_ENERGY(.*)/) { $features{'ionization_energy'} = $1 ; $control++; } # optionnal | |
| 168 elsif ($field =~/AC\$MASS_SPECTROMETRY: LASER(.*)/) { $features{'laser'} = $1 ; $control++; } # optionnal | |
| 169 elsif ($field =~/AC\$MASS_SPECTROMETRY: MATRIX(.*)/) { $features{'matrix'} = $1 ; $control++; } # optionnal | |
| 170 elsif ($field =~/AC\$MASS_SPECTROMETRY: MASS_ACCURACY(.*)/) { $features{'mass_accuracy'} = $1 ; $control++; } # optionnal | |
| 171 elsif ($field =~/AC\$MASS_SPECTROMETRY: REAGENT_GAS(.*)/) { $features{'reagent_gas'} = $1 ; $control++; } # optionnal | |
| 172 elsif ($field =~/AC\$MASS_SPECTROMETRY: SCANNING(.*)/) { $features{'scanning'} = $1 ; $control++; } # optionnal | |
| 173 else { next ; } | |
| 174 } | |
| 175 close(MS) ; | |
| 176 } | |
| 177 else { | |
| 178 croak "Can't work with a undef / none existing massbank file\n" ; | |
| 179 } | |
| 180 ## vide l'object si undef | |
| 181 if ($control == 0) { %features = () ; } | |
| 182 return(\%features) ; | |
| 183 } | |
| 184 ## END of SUB | |
| 185 | |
| 186 =head2 METHOD get_solvents_data | |
| 187 | |
| 188 ## Description : permet de recuperer tous les champs d'un object massbank | |
| 189 ## Input : $ms_file | |
| 190 ## Output : $features | |
| 191 ## Usage : my ( $features ) = get_solvents_data( $ms_file ) ; | |
| 192 | |
| 193 =cut | |
| 194 ## START of SUB | |
| 195 sub get_solvents_data { | |
| 196 ## Retrieve Values | |
| 197 my $self = shift ; | |
| 198 my ( $ms_file ) = @_ ; | |
| 199 | |
| 200 my @features = () ; | |
| 201 if ( ( defined $ms_file ) and ( -e $ms_file )) { | |
| 202 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ; | |
| 203 while ( my $field = <MS> ){ | |
| 204 chomp $field ; | |
| 205 if ($field =~/AC\$CHROMATOGRAPHY: SOLVENT(.*)/) { push(@features, 'Solvent '.$1 ) ; } | |
| 206 else { next ; } | |
| 207 } | |
| 208 close(MS) ; | |
| 209 } | |
| 210 else { | |
| 211 croak "Can't work with a undef / none existing massbank file\n" ; | |
| 212 } | |
| 213 return(\@features) ; | |
| 214 } | |
| 215 ## END of SUB | |
| 216 | |
| 217 =head2 METHOD get_sample_data | |
| 218 | |
| 219 ## Description : permet de recuperer tous les champs d'un object massbank | |
| 220 ## Input : $ms_file | |
| 221 ## Output : $features | |
| 222 ## Usage : my ( $features ) = get_sample_data( $ms_file ) ; | |
| 223 | |
| 224 =cut | |
| 225 ## START of SUB | |
| 226 sub get_sample_data { | |
| 227 ## Retrieve Values | |
| 228 my $self = shift ; | |
| 229 my ( $ms_file ) = @_ ; | |
| 230 | |
| 231 my $control = 0; | |
| 232 my %features = ( | |
| 233 'sample_type' => undef, | |
| 234 ) ; | |
| 235 if ( ( defined $ms_file ) and ( -e $ms_file )) { | |
| 236 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ; | |
| 237 while ( my $field = <MS> ){ | |
| 238 chomp $field ; | |
| 239 if ($field =~/SP\$SAMPLE(.*)/) { $features{'sample_type'} = $1 ; $control++ ; } | |
| 240 else { next ; } | |
| 241 } | |
| 242 close(MS) ; | |
| 243 } | |
| 244 else { | |
| 245 croak "Can't work with a undef / none existing massbank file\n" ; | |
| 246 } | |
| 247 if ($control == 0) { %features = () ; } | |
| 248 return(\%features) ; | |
| 249 } | |
| 250 ## END of SUB | |
| 251 | |
| 252 =head2 METHOD get_chromato_methods_data | |
| 253 | |
| 254 ## Description : permet de recuperer tous les champs d'un object massbank | |
| 255 ## Input : $ms_file | |
| 256 ## Output : $features | |
| 257 ## Usage : my ( $features ) = get_chromato_methods_data( $ms_file ) ; | |
| 258 | |
| 259 =cut | |
| 260 ## START of SUB | |
| 261 sub get_chromato_methods_data { | |
| 262 ## Retrieve Values | |
| 263 my $self = shift ; | |
| 264 my ( $ms_file ) = @_ ; | |
| 265 | |
| 266 my $control = 0 ; | |
| 267 my %features = ( | |
| 268 'capillary_voltage' => undef, | |
| 269 'column_name' => undef, | |
| 270 'column_temperature' => undef, | |
| 271 'flow_gradient' => undef, | |
| 272 'flow_rate' => undef, | |
| 273 'retention_time' => undef, | |
| 274 ) ; | |
| 275 if ( ( defined $ms_file ) and ( -e $ms_file )) { | |
| 276 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ; | |
| 277 while ( my $field = <MS> ){ | |
| 278 chomp $field ; | |
| 279 if ($field =~/AC\$CHROMATOGRAPHY: CAPILLARY_VOLTAGE (.*)/) { $features{'capillary_voltage'} = $1 ; $control++ ; } | |
| 280 elsif ($field =~/AC\$CHROMATOGRAPHY: COLUMN_NAME (.*)/) { $features{'column_name'} = $1 ; $control++ ; } | |
| 281 elsif ($field =~/AC\$CHROMATOGRAPHY: COLUMN_TEMPERATURE (.*)/) { $features{'column_temperature'} = $1 ; $control++ ; } | |
| 282 elsif ($field =~/AC\$CHROMATOGRAPHY: FLOW_GRADIENT (.*)/) { $features{'flow_gradient'} = $1 ; $control++ ; } | |
| 283 elsif ($field =~/AC\$CHROMATOGRAPHY: FLOW_RATE (.*)/) { $features{'flow_rate'} = $1 ; $control++ ; } | |
| 284 elsif ($field =~/AC\$CHROMATOGRAPHY: RETENTION_TIME (.*)/) { $features{'retention_time'} = $1 ; $control++ ; } | |
| 285 else { next ; } | |
| 286 } | |
| 287 close(MS) ; | |
| 288 # for db field | |
| 289 } | |
| 290 else { | |
| 291 croak "Can't work with a undef / none existing massbank file\n" ; | |
| 292 } | |
| 293 if ($control == 0) { %features = () ; } | |
| 294 return(\%features) ; | |
| 295 } | |
| 296 ## END of SUB | |
| 297 | |
| 298 =head2 METHOD get_analytical_conditions_data | |
| 299 | |
| 300 ## Description : permet de recuperer tous les champs d'un object massbank .. for massbank version < 2.0 | |
| 301 ## Input : $ms_file | |
| 302 ## Output : $features | |
| 303 ## Usage : my ( $features ) = get_analytical_conditions_data( $ms_file ) ; | |
| 304 | |
| 305 =cut | |
| 306 ## START of SUB | |
| 307 sub get_analytical_conditions_data { | |
| 308 ## Retrieve Values | |
| 309 my $self = shift ; | |
| 310 my ( $ms_file ) = @_ ; | |
| 311 my $control_ms = 0 ; | |
| 312 my %features_ms = ( | |
| 313 'ion_mode' => undef, | |
| 314 'ms_type' => undef, | |
| 315 'collision_energy' => undef, | |
| 316 'collision_gas' => undef, | |
| 317 'desolvation_gas_flow' => undef, | |
| 318 'desolvation_temperature' => undef, | |
| 319 'ionization_energy' => undef, | |
| 320 'laser' => undef, | |
| 321 'matrix' => undef, | |
| 322 'mass_accuracy' => undef, | |
| 323 'reagent_gas' => undef, | |
| 324 'scanning' => undef | |
| 325 ) ; | |
| 326 my $control_chrom = 0 ; | |
| 327 my %features_chrom = ( | |
| 328 'capillary_voltage' => undef, | |
| 329 'column_name' => undef, | |
| 330 'column_temperature' => undef, | |
| 331 'flow_gradient' => undef, | |
| 332 'flow_rate' => undef, | |
| 333 'retention_time' => undef | |
| 334 ) ; | |
| 335 if ( ( defined $ms_file ) and ( -e $ms_file )) { | |
| 336 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ; | |
| 337 while ( my $field = <MS> ){ | |
| 338 chomp $field ; | |
| 339 ## new = chromato_method | |
| 340 if ($field =~/AC\$ANALYTICAL_CONDITION: CAPILLARY_VOLTAGE (.*)/) { $features_chrom{'capillary_voltage'} = $1 ; $control_chrom++ ; } | |
| 341 elsif ($field =~/AC\$ANALYTICAL_CONDITION: COLUMN_NAME (.*)/) { $features_chrom{'column_name'} = $1 ; $control_chrom++ ; } | |
| 342 elsif ($field =~/AC\$ANALYTICAL_CONDITION: COLUMN_TEMPERATURE( .*)/) { $features_chrom{'column_temperature'} = $1 ; $control_chrom++ ; } | |
| 343 elsif ($field =~/AC\$ANALYTICAL_CONDITION: FLOW_GRADIENT (.*)/) { $features_chrom{'flow_gradient'} = $1 ; $control_chrom++ ; } | |
| 344 elsif ($field =~/AC\$ANALYTICAL_CONDITION: FLOW_RATE (.*)/) { $features_chrom{'flow_rate'} = $1 ; $control_chrom++ ; } | |
| 345 elsif ($field =~/AC\$ANALYTICAL_CONDITION: RETENTION_TIME (.*)/) { $features_chrom{'retention_time'} = $1 ; $control_chrom++ ; } | |
| 346 ## new = ms_method | |
| 347 elsif ($field =~/AC\$ANALYTICAL_CONDITION: ION_MODE (.*)/) { $features_ms{'ion_mode'} = $1 ; $control_ms++ ; } # mandatory | |
| 348 elsif ($field =~/AC\$ANALYTICAL_CONDITION: MS_TYPE (.*)/) { $features_ms{'ms_type'} = $1 ; $control_ms++ ; } # mandatory | |
| 349 elsif ($field =~/AC\$ANALYTICAL_CONDITION: COLLISION_ENERGY (.*)/) { $features_ms{'collision_energy'} = $1 ; $control_ms++ ; } # optionnal | |
| 350 elsif ($field =~/AC\$ANALYTICAL_CONDITION: COLLISION_GAS (.*)/) { $features_ms{'collision_gas'} = $1 ; $control_ms++ ; } # optionnal | |
| 351 elsif ($field =~/AC\$ANALYTICAL_CONDITION: DESOLVATION_GAS_FLOW (.*)/) { $features_ms{'desolvation_gas_flow'} = $1 ; $control_ms++ ; } # optionnal | |
| 352 elsif ($field =~/AC\$ANALYTICAL_CONDITION: DESOLVATION_TEMPERATURE (.*)/) { $features_ms{'desolvation_temperature'} = $1 ; $control_ms++ ; } # optionnal | |
| 353 elsif ($field =~/AC\$ANALYTICAL_CONDITION: IONIZATION_ENERGY (.*)/) { $features_ms{'ionization_energy'} = $1 ; $control_ms++ ; } # optionnal | |
| 354 elsif ($field =~/AC\$ANALYTICAL_CONDITION: LASER (.*)/) { $features_ms{'laser'} = $1 ; $control_ms++ ; } # optionnal | |
| 355 elsif ($field =~/AC\$ANALYTICAL_CONDITION: MATRIX (.*)/) { $features_ms{'matrix'} = $1 ; $control_ms++ ; } # optionnal | |
| 356 elsif ($field =~/AC\$ANALYTICAL_CONDITION: MASS_ACCURACY (.*)/) { $features_ms{'mass_accuracy'} = $1 ; $control_ms++ ; } # optionnal | |
| 357 elsif ($field =~/AC\$ANALYTICAL_CONDITION: REAGENT_GAS (.*)/) { $features_ms{'reagent_gas'} = $1 ; $control_ms++ ; } # optionnal | |
| 358 elsif ($field =~/AC\$ANALYTICAL_CONDITION: SCANNING (.*)/) { $features_ms{'scanning'} = $1 ; $control_ms++ ; } # optionnal | |
| 359 else { next ; } | |
| 360 } | |
| 361 close(MS) ; | |
| 362 # for db field | |
| 363 } | |
| 364 else { | |
| 365 croak "Can't work with a undef / none existing massbank file\n" ; | |
| 366 } | |
| 367 if ($control_ms == 0) { %features_ms = () ; } | |
| 368 if ($control_chrom == 0) { %features_chrom = () ; } | |
| 369 return(\%features_chrom, \%features_ms) ; | |
| 370 } | |
| 371 ## END of SUB | |
| 372 | |
| 373 =head2 METHOD get_spectrums_data | |
| 374 | |
| 375 ## Description : permet de recuperer tous les champs d'un object massbank | |
| 376 ## Input : $ms_file | |
| 377 ## Output : $features | |
| 378 ## Usage : my ( $features ) = get_spectrums_data( $ms_file ) ; | |
| 379 | |
| 380 =cut | |
| 381 ## START of SUB | |
| 382 sub get_spectrums_data { | |
| 383 ## Retrieve Values | |
| 384 my $self = shift ; | |
| 385 my ( $ms_file ) = @_ ; | |
| 386 my $control = 0 ; | |
| 387 my %features = ( | |
| 388 'ion_type' => undef, | |
| 389 'precursor_mz' => undef, | |
| 390 'precursor_type' => undef, | |
| 391 'num_peaks' => undef, | |
| 392 ) ; | |
| 393 if ( ( defined $ms_file ) and ( -e $ms_file )) { | |
| 394 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ; | |
| 395 while ( my $field = <MS> ){ | |
| 396 chomp $field ; | |
| 397 if ($field =~/MS\$FOCUSED_ION: ION_TYPE(.*)/) { $features{'ion_type'} = $1 ; $control++ ; } | |
| 398 elsif ($field =~/MS\$FOCUSED_ION: PRECURSOR_M\/Z(.*)/) { $features{'precursor_mz'} = $1 ; $control++ ; } | |
| 399 elsif ($field =~/MS\$FOCUSED_ION: PRECURSOR_TYPE(.*)/) { $features{'precursor_type'} = $1 ; $control++ ; } | |
| 400 elsif ($field =~/PK\$NUM_PEAK: (.*)/) { $features{'num_peaks'} = $1 ; $control++ ; } | |
| 401 else { next ; } | |
| 402 } | |
| 403 close(MS) ; | |
| 404 # for db field | |
| 405 } | |
| 406 else { | |
| 407 croak "Can't work with a undef / none existing massbank file\n" ; | |
| 408 } | |
| 409 if ($control == 0) { %features = () ; } | |
| 410 return(\%features) ; | |
| 411 } | |
| 412 ## END of SUB | |
| 413 | |
| 414 =head2 METHOD get_peaks_data | |
| 415 | |
| 416 ## Description : permet de recuperer tous les champs d'un object massbank | |
| 417 ## Input : $ms_file | |
| 418 ## Output : $features | |
| 419 ## Usage : my ( $features ) = get_peaks_data( $ms_file ) ; | |
| 420 | |
| 421 =cut | |
| 422 ## START of SUB | |
| 423 sub get_peaks_data { | |
| 424 ## Retrieve Values | |
| 425 my $self = shift ; | |
| 426 my ( $ms_file ) = @_ ; | |
| 427 | |
| 428 my @features = () ; | |
| 429 my $peaks = 0 ; | |
| 430 if ( ( defined $ms_file ) and ( -e $ms_file )) { | |
| 431 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ; | |
| 432 while ( my $field = <MS> ){ | |
| 433 chomp $field ; | |
| 434 if ($field =~/PK\$PEAK: m\/z int\. rel\.int\./) { $peaks = 1 ; } | |
| 435 elsif ( $peaks == 1 ) { ## detected peak area | |
| 436 if ($field =~/\s+(\d+)\s+(\d+)\s+(\d+)/) { | |
| 437 my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ; | |
| 438 push (@features, \%tmp) ; | |
| 439 } | |
| 440 ## for int = xx.xxx and mz = xxx.xxx | |
| 441 elsif ($field =~/\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+)/) { | |
| 442 my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ; | |
| 443 push (@features, \%tmp) ; | |
| 444 } | |
| 445 ## for int = xx and mz = xxx.xxx | |
| 446 elsif ($field =~/\s+(\d+\.\d+)\s+(\d+)\s+(\d+)/) { | |
| 447 my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ; | |
| 448 push (@features, \%tmp) ; | |
| 449 } | |
| 450 ## for int = xxxxx.xxx and mz = xxx | |
| 451 elsif ($field =~/\s+(\d+)\s+(\d+\.\d+)\s+(\d+)/) { | |
| 452 my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ; | |
| 453 push (@features, \%tmp) ; | |
| 454 } | |
| 455 } | |
| 456 else { next ; } | |
| 457 } | |
| 458 close(MS) ; | |
| 459 # for db field | |
| 460 } | |
| 461 else { | |
| 462 croak "Can't work with a undef / none existing massbank file\n" ; | |
| 463 } | |
| 464 return(\@features) ; | |
| 465 } | |
| 466 ## END of SUB | |
| 467 | |
| 468 =head2 METHOD getPeaksFromString | |
| 469 | |
| 470 ## Description : permet de recuperer la data peaks d'un record handler massbank | |
| 471 ## Input : $record | |
| 472 ## Output : $features | |
| 473 ## Usage : my ( $features ) = getPeaksFromString( $record ) ; | |
| 474 | |
| 475 =cut | |
| 476 ## START of SUB | |
| 477 sub getPeaksFromString { | |
| 478 ## Retrieve Values | |
| 479 my $self = shift ; | |
| 480 my ( $record ) = @_ ; | |
| 481 | |
| 482 my @features = () ; | |
| 483 my $peaks = 0 ; | |
| 484 if ( defined $record ) { | |
| 485 my @tmp = split(/\n/, $record) ; | |
| 486 foreach my $field (@tmp) { | |
| 487 if ($field =~/PK\$PEAK: m\/z int\. rel\.int\./) { $peaks = 1 ; } | |
| 488 elsif ( $peaks == 1 ) { ## detected peak area | |
| 489 if ($field =~/\s+(\d+)\s+(\d+)\s+(\d+)/) { | |
| 490 my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ; | |
| 491 push (@features, \%tmp) ; | |
| 492 } | |
| 493 ## for int = xx.xxx and mz = xxx.xxx | |
| 494 elsif ($field =~/\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+)/) { | |
| 495 my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ; | |
| 496 push (@features, \%tmp) ; | |
| 497 } | |
| 498 ## for int = xx and mz = xxx.xxx | |
| 499 elsif ($field =~/\s+(\d+\.\d+)\s+(\d+)\s+(\d+)/) { | |
| 500 my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ; | |
| 501 push (@features, \%tmp) ; | |
| 502 } | |
| 503 ## for int = xxxxx.xxx and mz = xxx | |
| 504 elsif ($field =~/\s+(\d+)\s+(\d+\.\d+)\s+(\d+)/) { | |
| 505 my %tmp = ( 'mz' => $1, 'intensity' => $2, 'relative_intensity' => $3 ) ; | |
| 506 push (@features, \%tmp) ; | |
| 507 } | |
| 508 ## for int = x.xxxex and m/z = xxx.xxx (int with exposant) | |
| 509 elsif ($field =~/\s+(\d+\.\d+)\s+(\d+\.\d+)e(\d)\s+(\d+)/) { | |
| 510 my %tmp = ( 'mz' => $1, 'intensity' => ($2*(10*$3)), 'relative_intensity' => $4 ) ; | |
| 511 push (@features, \%tmp) ; | |
| 512 } | |
| 513 } | |
| 514 else { next ; } | |
| 515 } | |
| 516 # for db field | |
| 517 } | |
| 518 else { | |
| 519 croak "Can't work with a undef / none existing massbank handler\n" ; | |
| 520 } | |
| 521 return(\@features) ; | |
| 522 } | |
| 523 ## END of SUB | |
| 524 | |
| 525 =head2 METHOD getIdFromString | |
| 526 | |
| 527 ## Description : get the accesion id of massbank record | |
| 528 ## Input : $record | |
| 529 ## Output : $id | |
| 530 ## Usage : my ( $id ) = getIdFromString ( $record ) ; | |
| 531 | |
| 532 =cut | |
| 533 ## START of SUB | |
| 534 sub getIdFromString { | |
| 535 ## Retrieve Values | |
| 536 my $self = shift ; | |
| 537 my ( $record ) = @_; | |
| 538 my ( $id ) = ( undef ) ; | |
| 539 | |
| 540 if ( defined $record ) { | |
| 541 my @tmp = split(/\n/, $record) ; | |
| 542 foreach my $field (@tmp) { | |
| 543 if ($field =~/ACCESSION:\s+(.+)/) { | |
| 544 $id = $1; | |
| 545 } | |
| 546 } | |
| 547 # for db field | |
| 548 } | |
| 549 else { | |
| 550 croak "Can't work with a undef / none existing massbank handler\n" ; | |
| 551 } | |
| 552 | |
| 553 return ($id) ; | |
| 554 } | |
| 555 ### END of SUB | |
| 556 | |
| 557 | |
| 558 | |
| 559 =head2 METHOD getInstrumentTypeFromString | |
| 560 | |
| 561 ## Description : get the instrument type of massbank record | |
| 562 ## Input : $record | |
| 563 ## Output : $instrumentType | |
| 564 ## Usage : my ( $instrumentType ) = getInstrumentTypeFromString ( $record ) ; | |
| 565 | |
| 566 =cut | |
| 567 ## START of SUB | |
| 568 sub getInstrumentTypeFromString { | |
| 569 ## Retrieve Values | |
| 570 my $self = shift ; | |
| 571 my ( $record ) = @_; | |
| 572 my ( $instrumentType ) = ( undef ) ; | |
| 573 | |
| 574 if ( defined $record ) { | |
| 575 my @tmp = split(/\n/, $record) ; | |
| 576 foreach my $field (@tmp) { | |
| 577 if ($field =~/INSTRUMENT_TYPE:\s+(.+)/) { | |
| 578 $instrumentType = $1; | |
| 579 } | |
| 580 } | |
| 581 # for db field | |
| 582 } | |
| 583 else { | |
| 584 croak "Can't work with a undef / none existing massbank handler\n" ; | |
| 585 } | |
| 586 | |
| 587 return ($instrumentType) ; | |
| 588 } | |
| 589 ### END of SUB | |
| 590 | |
| 591 =head2 METHOD getFormulaFromString | |
| 592 | |
| 593 ## Description : get the elementar formula of massbank record | |
| 594 ## Input : $record | |
| 595 ## Output : $formula | |
| 596 ## Usage : my ( $formula ) = getFormulaFromString ( $record ) ; | |
| 597 | |
| 598 =cut | |
| 599 ## START of SUB | |
| 600 sub getFormulaFromString { | |
| 601 ## Retrieve Values | |
| 602 my $self = shift ; | |
| 603 my ( $record ) = @_; | |
| 604 my ( $formula ) = ( undef ) ; | |
| 605 | |
| 606 if ( defined $record ) { | |
| 607 my @tmp = split(/\n/, $record) ; | |
| 608 foreach my $field (@tmp) { | |
| 609 if ($field =~/CH\$FORMULA:\s+(.+)/) { | |
| 610 $formula = $1; | |
| 611 } | |
| 612 } | |
| 613 # for db field | |
| 614 } | |
| 615 else { | |
| 616 croak "Can't work with a undef / none existing massbank handler\n" ; | |
| 617 } | |
| 618 | |
| 619 return ($formula) ; | |
| 620 } | |
| 621 ### END of SUB | |
| 622 | |
| 623 =head2 METHOD getInchiFromString | |
| 624 | |
| 625 ## Description : get the IUPAC InCHi of massbank record | |
| 626 ## Input : $record | |
| 627 ## Output : $inchi | |
| 628 ## Usage : my ( $inchi ) = getInchiFromString ( $record ) ; | |
| 629 | |
| 630 =cut | |
| 631 ## START of SUB | |
| 632 sub getInchiFromString { | |
| 633 ## Retrieve Values | |
| 634 my $self = shift ; | |
| 635 my ( $record ) = @_; | |
| 636 my ( $inchi ) = ( undef ) ; | |
| 637 | |
| 638 if ( defined $record ) { | |
| 639 my @tmp = split(/\n/, $record) ; | |
| 640 foreach my $field (@tmp) { | |
| 641 if ($field =~/CH\$IUPAC:\s+(.+)/) { | |
| 642 $inchi = $1; | |
| 643 } | |
| 644 } | |
| 645 # for db field | |
| 646 } | |
| 647 else { | |
| 648 croak "Can't work with a undef / none existing massbank handler\n" ; | |
| 649 } | |
| 650 | |
| 651 return ($inchi) ; | |
| 652 } | |
| 653 ### END of SUB | |
| 654 | |
| 655 =head2 METHOD getExactMzFromString | |
| 656 | |
| 657 ## Description : get the exact mass of massbank record | |
| 658 ## Input : $record | |
| 659 ## Output : $exactMass | |
| 660 ## Usage : my ( $exactMass ) = getExactMzFromString ( $record ) ; | |
| 661 | |
| 662 =cut | |
| 663 ## START of SUB | |
| 664 sub getExactMzFromString { | |
| 665 ## Retrieve Values | |
| 666 my $self = shift ; | |
| 667 my ( $record ) = @_; | |
| 668 my ( $exactMass ) = ( undef ) ; | |
| 669 | |
| 670 if ( defined $record ) { | |
| 671 my @tmp = split(/\n/, $record) ; | |
| 672 foreach my $field (@tmp) { | |
| 673 if ($field =~/CH\$EXACT_MASS:\s+(.+)/) { | |
| 674 $exactMass = $1; | |
| 675 } | |
| 676 } | |
| 677 # for db field | |
| 678 } | |
| 679 else { | |
| 680 croak "Can't work with a undef / none existing massbank handler\n" ; | |
| 681 } | |
| 682 | |
| 683 return ($exactMass) ; | |
| 684 } | |
| 685 ### END of SUB | |
| 686 | |
| 687 | |
| 688 =head2 METHOD getPrecursorTypeFromString | |
| 689 | |
| 690 ## Description : get the precursor type of massbank record | |
| 691 ## Input : $record | |
| 692 ## Output : $precursorType | |
| 693 ## Usage : my ( $precursorType ) = getPrecursorTypeFromString ( $record ) ; | |
| 694 | |
| 695 =cut | |
| 696 ## START of SUB | |
| 697 sub getPrecursorTypeFromString { | |
| 698 ## Retrieve Values | |
| 699 my $self = shift ; | |
| 700 my ( $record ) = @_; | |
| 701 my $id = undef ; | |
| 702 my $precursorType = undef ; | |
| 703 my $precursorType_first = undef ; | |
| 704 my $ionType_first = undef ; | |
| 705 my $precursorType_optionnal = undef ; | |
| 706 | |
| 707 if ( defined $record ) { | |
| 708 my @tmp = split(/\n/, $record) ; | |
| 709 foreach my $field (@tmp) { | |
| 710 if ($field =~/ACCESSION:\s+(.+)/) { | |
| 711 $id = $1; | |
| 712 } | |
| 713 if ($field =~/RECORD_TITLE:\s+(.+)/) { | |
| 714 my @title = split(/;/, $1) ; | |
| 715 $precursorType_optionnal = $title[-1] ; | |
| 716 $precursorType_optionnal =~ s/\s//g ; | |
| 717 } | |
| 718 if ($field =~/PRECURSOR_TYPE(.+)/) { | |
| 719 $precursorType_first = $1; | |
| 720 last; | |
| 721 } | |
| 722 if ($field =~/ION_TYPE(.+)/) { | |
| 723 $ionType_first = $1; | |
| 724 last; | |
| 725 } | |
| 726 } | |
| 727 # for db field | |
| 728 } | |
| 729 else { | |
| 730 croak "Can't work with a undef / none existing massbank handler\n" ; | |
| 731 } | |
| 732 | |
| 733 ## manage undef precursor/ion type field | |
| 734 # print "ID:$id-//-$precursorType_first-//-$ionType_first-//-$precursorType_optionnal\n" ; | |
| 735 if (defined $precursorType_first) { | |
| 736 $precursorType = $precursorType_first ; | |
| 737 } | |
| 738 elsif ( (!defined $precursorType_first) and (defined $ionType_first) ) { | |
| 739 $precursorType = $ionType_first ; | |
| 740 } | |
| 741 elsif ( (!defined $precursorType_first) and (!defined $ionType_first) and (defined $precursorType_optionnal) ) { | |
| 742 $precursorType = $precursorType_optionnal ; | |
| 743 } | |
| 744 else { | |
| 745 $precursorType = 'NA' ; | |
| 746 } | |
| 747 | |
| 748 return ($precursorType) ; | |
| 749 } | |
| 750 ### END of SUB | |
| 751 | |
| 752 =head2 METHOD getMsTypeFromString | |
| 753 | |
| 754 ## Description : get the MS type of massbank record | |
| 755 ## Input : $record | |
| 756 ## Output : $msType | |
| 757 ## Usage : my ( $msType ) = getMsTypeFromString ( $record ) ; | |
| 758 | |
| 759 =cut | |
| 760 ## START of SUB | |
| 761 sub getMsTypeFromString { | |
| 762 ## Retrieve Values | |
| 763 my $self = shift ; | |
| 764 my ( $record ) = @_; | |
| 765 my ( $msType ) = ( undef ) ; | |
| 766 | |
| 767 if ( defined $record ) { | |
| 768 my @tmp = split(/\n/, $record) ; | |
| 769 foreach my $field (@tmp) { | |
| 770 if ($field =~/AC\$MASS_SPECTROMETRY:\s+MS_TYPE\s+(.+)/) { | |
| 771 $msType = $1; | |
| 772 } | |
| 773 } | |
| 774 # for db field | |
| 775 } | |
| 776 else { | |
| 777 croak "Can't work with a undef / none existing massbank handler\n" ; | |
| 778 } | |
| 779 | |
| 780 return ($msType) ; | |
| 781 } | |
| 782 ### END of SUB | |
| 783 | |
| 784 =head2 METHOD getChemNamesFromString | |
| 785 | |
| 786 ## Description : get lits of names of a massbank record | |
| 787 ## Input : $record | |
| 788 ## Output : $names | |
| 789 ## Usage : my ( $names ) = getChemNamesFromString( $record ) ; | |
| 790 | |
| 791 =cut | |
| 792 ## START of SUB | |
| 793 sub getChemNamesFromString { | |
| 794 ## Retrieve Values | |
| 795 my $self = shift ; | |
| 796 my ( $record ) = @_ ; | |
| 797 | |
| 798 my @names = () ; | |
| 799 if ( defined $record ) { | |
| 800 my @tmp = split(/\n/, $record) ; | |
| 801 foreach my $field (@tmp) { | |
| 802 if ($field =~/CH\$NAME: (.*)/) { | |
| 803 push(@names, $1 ) ; } | |
| 804 else { next ; } | |
| 805 } | |
| 806 } | |
| 807 else { | |
| 808 croak "Can't work with a undef / none existing massbank record (string)\n" ; | |
| 809 } | |
| 810 return(\@names) ; | |
| 811 } | |
| 812 ## END of SUB | |
| 813 | |
| 814 | |
| 815 | |
| 816 | |
| 817 | |
| 818 =head2 METHOD getMassBankHandler | |
| 819 | |
| 820 ## Description : get a massbank handler from a file | |
| 821 ## Input : $record | |
| 822 ## Output : $massbankHandler | |
| 823 ## Usage : my ( $massbankHandler ) = getMassBankHandler ( $record ) ; | |
| 824 | |
| 825 =cut | |
| 826 ## START of SUB | |
| 827 sub getMassBankHandler { | |
| 828 ## Retrieve Values | |
| 829 my $self = shift ; | |
| 830 my ( $record ) = @_; | |
| 831 my ( $massbankHandler ) = ( undef ) ; | |
| 832 | |
| 833 ## TODO... | |
| 834 | |
| 835 return ($massbankHandler) ; | |
| 836 } | |
| 837 ### END of SUB | |
| 838 | |
| 839 =head2 METHOD get_annotations_data | |
| 840 | |
| 841 ## Description : permet de recuperer tous les champs d'un object massbank | |
| 842 ## Input : $ms_file | |
| 843 ## Output : $features | |
| 844 ## Usage : my ( $features ) = get_annotations_data( $ms_file ) ; | |
| 845 | |
| 846 =cut | |
| 847 ## START of SUB | |
| 848 sub get_annotations_data { | |
| 849 ## Retrieve Values | |
| 850 my $self = shift ; | |
| 851 my ( $ms_file ) = @_ ; | |
| 852 | |
| 853 my @features = () ; | |
| 854 if ( ( defined $ms_file ) and ( -e $ms_file )) { | |
| 855 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ; | |
| 856 while ( my $field = <MS> ){ | |
| 857 chomp $field ; | |
| 858 if ($field =~/PK\$ANNOTATION:(.*)/) { push( @features, $1) ; } | |
| 859 else { next ; } | |
| 860 } | |
| 861 close(MS) ; | |
| 862 # for db field | |
| 863 } | |
| 864 else { | |
| 865 croak "Can't work with a undef / none existing massbank file\n" ; | |
| 866 } | |
| 867 return(\@features) ; | |
| 868 } | |
| 869 ## END of SUB | |
| 870 | |
| 871 =head2 METHOD get_links_data | |
| 872 | |
| 873 ## Description : permet de recuperer tous les champs d'un object massbank | |
| 874 ## Input : $ms_file | |
| 875 ## Output : $features | |
| 876 ## Usage : my ( $features ) = get_annotations_data( $ms_file ) ; | |
| 877 | |
| 878 =cut | |
| 879 ## START of SUB | |
| 880 sub get_links_data { | |
| 881 ## Retrieve Values | |
| 882 my $self = shift ; | |
| 883 my ( $ms_file ) = @_ ; | |
| 884 | |
| 885 my %features = () ; | |
| 886 my $control = 0 ; | |
| 887 | |
| 888 my ( @CAS, @KEGG, @PUBCHEM ) = ((), (), ()) ; | |
| 889 | |
| 890 if ( ( defined $ms_file ) and ( -e $ms_file )) { | |
| 891 open(MS, "<$ms_file") or die "Cant' read the file $ms_file\n" ; | |
| 892 while ( my $field = <MS> ){ | |
| 893 chomp $field ; | |
| 894 if ($field =~/CH\$LINK: CAS (.*)/) { push (@CAS, $1) ; $control++; } | |
| 895 elsif ($field =~/CH\$LINK: KEGG (.*)/) { push (@KEGG, $1) ; $control++; } | |
| 896 elsif ($field =~/CH\$LINK: PUBCHEM CID (.*)/) { push (@PUBCHEM, $1) ; $control++; } | |
| 897 ## others !!? | |
| 898 | |
| 899 else { next ; } | |
| 900 } | |
| 901 close(MS) ; | |
| 902 # for db field | |
| 903 } | |
| 904 else { | |
| 905 croak "Can't work with a undef / none existing massbank file\n" ; | |
| 906 } | |
| 907 | |
| 908 $features{'CAS'} = \@CAS ; | |
| 909 $features{'KEGG'} = \@KEGG ; | |
| 910 $features{'PUBCHEM'} = \@PUBCHEM ; | |
| 911 | |
| 912 return(\%features) ; | |
| 913 } | |
| 914 ## END of SUB | |
| 915 | |
| 916 =head2 METHOD get_ms_record_links_data | |
| 917 | |
| 918 ## Description : permet de recuperer tous les champs d'un object massbank | |
| 919 ## Input : $ms_file | |
| 920 ## Output : $features | |
| 921 ## Usage : my ( $features ) = get_ms_record_links_data( $ms_file ) ; | |
| 922 | |
| 923 =cut | |
| 924 ## START of SUB | |
| 925 sub get_ms_record_links_data { | |
| 926 ## Retrieve Values | |
| 927 my $self = shift ; | |
| 928 my ( $ms_file ) = @_ ; | |
| 929 | |
| 930 ## Internal reference for MASSBANK and RESPECT | |
| 931 | |
| 932 my @massbank_id = ( 'TUE', 'GLS', 'AU', 'MSJ', 'ML','FIO', 'UF', 'CO', 'UO', 'TT', 'OUF', 'MCH', 'NU', 'KNA', 'MT', 'CE', 'KO', 'KZ', 'JEL', 'JP', 'PR', 'BML', 'CA', 'TY', 'PB', 'FU', 'EA', 'UT', 'BSU', 'WA' ) ; | |
| 933 my @respect_id = ( 'PS', 'PT', 'PM' ) ; | |
| 934 | |
| 935 my $dabase_used = undef ; | |
| 936 my %db = ( 'accession' => undef, 'name' => undef ) ; | |
| 937 my $control = 0 ; | |
| 938 | |
| 939 if ( $ms_file ) { | |
| 940 my $filename = basename("$ms_file", ".txt"); | |
| 941 | |
| 942 if ( $filename =~ /(\w+)$/ ) { # keep only record id (0001-PS0002 => PS0002 or BJ0045 => BJ0045) | |
| 943 $db{'accession'} = $1 ; | |
| 944 $control++ ; | |
| 945 if ( ( defined $db{'accession'} ) and ( $db{'accession'} =~ /(\D+)(\d+)/) ) { | |
| 946 my ($key, $eval) = ($1, 0) ; | |
| 947 foreach (@respect_id) { if ($_ eq $key) { $db{'name'} = 'RESPECT' ; $eval = 1 ; last ; } } | |
| 948 foreach (@massbank_id) { if ($_ eq $key) { $db{'name'} = 'MASSBANK' ; $eval = 1 ; last ; } } | |
| 949 if ( $eval == 0 ){ carp "The following key ($key) for $db{'accession'} has an unknown reference (not a Massbank or ReSpect source)\n" ; } | |
| 950 } | |
| 951 } | |
| 952 } | |
| 953 if ($control == 0) { %db = () ; } | |
| 954 return(\%db) ; | |
| 955 } | |
| 956 ## END of SUB | |
| 957 | |
| 958 | |
| 959 1 ; | |
| 960 | |
| 961 | |
| 962 __END__ | |
| 963 | |
| 964 =head1 SUPPORT | |
| 965 | |
| 966 You can find documentation for this module with the perldoc command. | |
| 967 | |
| 968 perldoc parser::chem::massbank.pm | |
| 969 | |
| 970 =head1 Exports | |
| 971 | |
| 972 =over 4 | |
| 973 | |
| 974 =item :ALL is ... | |
| 975 | |
| 976 =back | |
| 977 | |
| 978 =head1 AUTHOR | |
| 979 | |
| 980 Franck Giacomoni E<lt>franck.giacomoni@clermont.inra.frE<gt> | |
| 981 | |
| 982 =head1 LICENSE | |
| 983 | |
| 984 This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. | |
| 985 | |
| 986 =head1 VERSION | |
| 987 | |
| 988 version 1 : 25 / 06 / 2013 | |
| 989 | |
| 990 version 2 : ?? | |
| 991 | |
| 992 =cut |
