Mercurial > repos > fgiacomoni > downloader_bank_hmdb
view downloader_bank_hmdb.pl @ 1:4373f936111d draft
" master branch Updating with tag :CI_COMMIT_TAG - - Fxx"
author | fgiacomoni |
---|---|
date | Tue, 21 Jan 2020 16:09:45 -0500 |
parents | 7c9269bded0e |
children | be504ccbc41c |
line wrap: on
line source
#!perl ## script : downloader_bank_hmdb.pl #============================================================================= # Included modules and versions #============================================================================= ## Perl modules use strict ; use warnings ; use Carp qw (cluck croak carp) ; use Data::Dumper ; use Getopt::Long ; use File::Basename ; use FindBin ; ## Allows you to locate the directory of original perl script ## Specific Perl Modules (PFEM) use lib $FindBin::Bin.'/lib' ; my $binPath = $FindBin::Bin ; ## Dedicate Perl Modules (Home made...) use hmdb_api qw( :ALL ) ; use utils qw( :ALL ) ; use conf qw( :ALL ) ; use csv qw( :ALL ) ; ## Initialized values my $OptHelp ; my $VERBOSE = 3 ; my ($bankName, $format, $outTab, $outJson) = (undef, undef, undef, undef) ; #============================================================================= # Manage EXCEPTIONS #============================================================================= &GetOptions ( "h" => \$OptHelp, # HELP "bank|b:s" => \$bankName, # bank name to get the rigth url "format|f:s"=> \$format, # output format "outTab:s" => \$outTab, # tabular output "outJson:s" => \$outJson, # Json output "verbose:i" => \$VERBOSE, # level of scriptt verbose [should be 1 or 3] ) ; ## if you put the option -help or -h function help is started if ( defined($OptHelp) ){ &help ; } #============================================================================= # MAIN SCRIPT #============================================================================= # get conf informations my ( $CONF ) = ( undef ) ; foreach my $conf ( <$binPath/*.cfg> ) { my $oConf = conf::new() ; $CONF = $oConf->as_conf($conf) ; } # MAIN SCRIPT : if ( (defined $bankName) ) { my ( $bankUrl, $bankVersion, $bankSuffixe ) = (undef, undef, undef) ; my ( $bankOutputTabularFile, $bankOutputJsonFile ) = (undef, undef) ; print "** Get version information from Wishart server databases (Canada)\n" if $VERBOSE > 1 ; # get information from conf if ($CONF->{$bankName.'_URL'}) { $bankUrl = $CONF->{$bankName.'_URL'} ; # get version of the http resource my $oUtils = utils->new() ; ($bankVersion) = $oUtils->getHttpFileVersion($bankUrl) ; print "\tThe current version is: $bankVersion\n" if $VERBOSE > 1 ; } else { croak "the given bank name ($bankName) doesn't match with any configuration parameters -- database downloading stopped" ; } # manage if needed to download or not the bank (get or not the last version) my ($bankFileExist, $bankFilePath, $bankFileDir) = (undef, undef, undef) ; if ($CONF->{$bankName.'_HTTP_FORMAT'}) { $bankSuffixe = $CONF->{$bankName.'_HTTP_FORMAT'} ; } else { croak "[ERROR] the given bank suffixe doesn't exist! please check your configuration parameters -- database download stopped" ; } print "** Manage bank environment\n" if $VERBOSE > 1 ; my $bankDir = $binPath.'/'.$CONF->{'LOCALBANK_PATH'} ; if ( !-d $bankDir ) { print "\tdir creation of $bankDir\n" if $VERBOSE > 1 ; mkdir $bankDir ; } print "** Manage the download of the last version of the bank\n" if $VERBOSE > 1 ; if ( ( defined $bankUrl ) and ( defined $bankVersion ) ) { if(-d $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion) { $bankFileExist = 'TRUE' ; } else { $bankFileExist = 'FALSE' ; } # Init the bank file name $bankFileDir = $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion ; $bankFilePath = $bankFileDir.'/'.$bankName.'_'.$bankVersion.'.'.lc($bankSuffixe) ; # For final files (tabular/json) $bankOutputTabularFile = $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion.'/'.$bankName.'_'.$bankVersion.'.tabular' ; $bankOutputJsonFile = $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion.'/'.$bankName.'_'.$bankVersion.'.json' ; print "\tBuilding bank file dir: $bankFileDir\n" if $VERBOSE > 1 ; print "\tBuilding bank file name: $bankFilePath\n" if $VERBOSE > 1 ; } else { croak "the given bank url ($bankUrl) and the detected version ar undef -- database download stopped" ; } my $bankFile = $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'} ; # dowload the bank if does not exist ! if ( $bankFileExist eq 'FALSE' ) { print "\t/!\\ The asked bank does not still exist /!\\\n" if $VERBOSE > 1 ; if (!-d $bankFileDir) { print "\tdir creation of $bankFileDir\n" if $VERBOSE > 1 ; mkdir $bankFileDir ; } if (-d $bankFileDir) { print "\tDownload of the asked bank ($bankName)...\n" if $VERBOSE > 1 ; print "\tFrom...$bankUrl\n" if $VERBOSE > 1 ; my $oDownloader = utils->new() ; my ($fileZip) = $oDownloader->getHttpFile($bankUrl, $bankFilePath) ; print "\tUnzip the download archive ($bankFilePath) and clean env ...\n" if $VERBOSE > 1 ; my $oUnzip = utils->new() ; # if archive is a zip if ($bankSuffixe eq 'ZIP') { $oUnzip->unzipFile($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'}, $CONF->{$bankName.'_FILE_NAME'}) ; $oUnzip->cleanUnzip($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'} ) ; } # elsif archive is a gz elsif ($bankSuffixe eq 'GZ') { $oUnzip->gunzipFile($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'}, $CONF->{$bankName.'_FILE_NAME'}) ; $oUnzip->cleanUnzip($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'} ) ; } } else { croak "the given bank dir doesn't ($bankFileDir) exist or cannot be created - No download started\n" ; } } else { print "\t/!\\ $bankName Bank is already present on the disk... /!\\\n" if $VERBOSE > 1 ; print "\t/!\\ ...in $bankFile /!\\\n" if $VERBOSE > 1 ; } ## if (-e $bankOutputTabularFile) { # copy the ref file into the user history/session my $ocsv = csv->new( ) ; my $csv = $ocsv->get_csv_object("\t") ; my ($refEntries, $status) = $ocsv->parse_allcsv_object($csv, \$bankOutputTabularFile, 'y') ; $ocsv->write_csv_from_arrays($csv, $outTab, $refEntries) ; print "\t/!\\ copy the ref file $bankOutputTabularFile into your session ($outTab)... /!\\\n" if $VERBOSE > 1 ; } ## the final tabular file does not exists - need to be created from xml else { ## Build a HASH with all metabolites from downloaded xml my ($handler, $metabolites, $nbEntries) = (undef, undef) ; if (-e $bankFile) { my $oHandler = hmdb_api->new ; # in case the download file is in XML or Metabocard or SDF... if ($CONF->{$bankName.'_DB_FORMAT'} eq 'XML') { ($metabolites, $nbEntries) = $oHandler->getMetaboliteFeatures($bankFile) ; print "\tExtraction of $nbEntries metabolites from $bankName XML file\n" if $VERBOSE > 1 ; } elsif ( ($CONF->{$bankName.'_DB_FORMAT'} eq 'CARD') ) { ($handler, $nbEntries) = $oHandler->cowmetdb_handle($bankFile) ; ($metabolites) = $oHandler->cowmetdb_hash($handler) ; print "\tExtraction of $$nbEntries metabolites from $bankName CARD file\n" if $VERBOSE > 1 ; } else { #TODO... } } else { print "\t/!\\ The bank does not exist: $bankFile\n" if $VERBOSE > 1 ; } ## Write outputs ! print "** Write outputs from HMDB in BiH and Json formats\n" if $VERBOSE > 1 ; if ( (defined $metabolites) and (defined $format) ) { ## Generation of M+H and M-H masses my $ometmz = hmdb_api->new ; my $completedMetabolites = undef ; $completedMetabolites = $ometmz->setMetaboliteAcurrateMzToModesMz($CONF->{$bankName.'_DB_FORMAT'}, $metabolites, $CONF->{'PROTON_MASS'}, $CONF->{'ELECTRON_MASS'}, 1) ; # print Dumper $completedMetabolites ; ## tabular output if ( ( $format eq 'tabular') and (defined $outTab) ) { print "\tThe tabular output ($outTab) is created...\n" if $VERBOSE > 1 ; # print Dumper $metabolites ; # sort metabolites my $omet = hmdb_api->new ; my $sortedMetabolites = undef ; if ($CONF->{$bankName.'_DB_FORMAT'} eq 'XML') { $sortedMetabolites = $omet->buildMetabolitesArray($completedMetabolites) ; } elsif ( ($CONF->{$bankName.'_DB_FORMAT'} eq 'CARD') ) { $sortedMetabolites= $omet->cowmetdb_hash_to_inhouse_format($completedMetabolites) ; } # print Dumper $sortedMetabolites ; my $ocsv = csv->new( ) ; my $csv = $ocsv->get_csv_object("\t") ; # create the ref file for ./bank repo $ocsv->write_csv_from_arrays($csv, $bankOutputTabularFile, $sortedMetabolites) ; # generate also a copy for user history $ocsv->write_csv_from_arrays($csv, $outTab, $sortedMetabolites) ; } elsif ( (defined $format) and (defined $outJson) ) { #TODO... } } else { croak "No metabolites are extracted from the $bankName bank file\n" ; } ## Clean local envt unlink $bankFile if (-e $bankFile) ; } } ## END IF defined $bankName else { &help ; croak "No bank name and format are defined - Please set one" ; } print "\n*************!!End of the job ;-). Thank you for using W4M!!****************\n" if $VERBOSE > 1 ; ### END of main script #==================================================================================== # Help subroutine called with -h option # number of arguments : 0 # Argument(s) : # Return : 1 #==================================================================================== sub help { print STDERR " downloader_bank_hmdb.pl # downloader_bank_hmdb is a script to export specific tissue/matrix bank from HMDB source. # Input : N/A # Author : Franck Giacomoni # Email : fgiacomoni\@inra.fr # Version : 1.0 # Created : 21/11/2018 USAGE : downloader_bank_hmdb.pl -bank [SERUM|URINE|CSF|...] -format [tabular|json] -outTab [tabular file name] "; exit(1); } ## END of script - F Giacomoni __END__ =head1 NAME downloader_bank_hmdb.pl is a script to export specific tissue/matrix bank from HMDB source. =head1 USAGE downloader_bank_hmdb.pl -bank [serum|urine|...] -format [tabular|json] or downloader_bank_hmdb.pl -help =head1 SYNOPSIS This script export specific tissue/matrix bank from HMDB source. =head1 DESCRIPTION This main program is a galaxy tool (W4M) allowing the export specific tissue/matrix bank from HMDB source in a tabular format. =over 4 =item B<function01> =item B<function02> =back =head1 AUTHOR Franck Giacomoni E<lt>franck.giacomoni@inra.frE<gt> =head1 LICENSE This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =head1 VERSION version 1 : 21 / 11 / 2018 version 2 : ?? =cut