Mercurial > repos > fgiacomoni > downloader_bank_hmdb
diff downloader_bank_hmdb.pl @ 0:7c9269bded0e draft
Init repository for [downloader_bank_hmdb]
author | fgiacomoni |
---|---|
date | Tue, 14 Jan 2020 05:21:23 -0500 |
parents | |
children | be504ccbc41c |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/downloader_bank_hmdb.pl Tue Jan 14 05:21:23 2020 -0500 @@ -0,0 +1,321 @@ +#!perl + +## script : downloader_bank_hmdb.pl +#============================================================================= +# Included modules and versions +#============================================================================= +## Perl modules +use strict ; +use warnings ; +use Carp qw (cluck croak carp) ; + +use Data::Dumper ; +use Getopt::Long ; +use File::Basename ; +use FindBin ; ## Allows you to locate the directory of original perl script + +## Specific Perl Modules (PFEM) +use lib $FindBin::Bin.'/lib' ; +my $binPath = $FindBin::Bin ; + +## Dedicate Perl Modules (Home made...) +use hmdb_api qw( :ALL ) ; +use utils qw( :ALL ) ; +use conf qw( :ALL ) ; +use csv qw( :ALL ) ; + +## Initialized values +my $OptHelp ; +my $VERBOSE = 3 ; +my ($bankName, $format, $outTab, $outJson) = (undef, undef, undef, undef) ; + +#============================================================================= +# Manage EXCEPTIONS +#============================================================================= +&GetOptions ( "h" => \$OptHelp, # HELP + "bank|b:s" => \$bankName, # bank name to get the rigth url + "format|f:s"=> \$format, # output format + "outTab:s" => \$outTab, # tabular output + "outJson:s" => \$outJson, # Json output + "verbose:i" => \$VERBOSE, # level of scriptt verbose [should be 1 or 3] + + + ) ; + +## if you put the option -help or -h function help is started +if ( defined($OptHelp) ){ &help ; } + +#============================================================================= +# MAIN SCRIPT +#============================================================================= + + +# get conf informations +my ( $CONF ) = ( undef ) ; +foreach my $conf ( <$binPath/*.cfg> ) { + my $oConf = conf::new() ; + $CONF = $oConf->as_conf($conf) ; +} + +# MAIN SCRIPT : + +if ( (defined $bankName) ) { + + my ( $bankUrl, $bankVersion, $bankSuffixe ) = (undef, undef, undef) ; + my ( $bankOutputTabularFile, $bankOutputJsonFile ) = (undef, undef) ; + + print "** Get version information from Wishart server databases (Canada)\n" if $VERBOSE > 1 ; + + # get information from conf + if ($CONF->{$bankName.'_URL'}) { + $bankUrl = $CONF->{$bankName.'_URL'} ; + # get version of the http resource + my $oUtils = utils->new() ; + ($bankVersion) = $oUtils->getHttpFileVersion($bankUrl) ; + + print "\tThe current version is: $bankVersion\n" if $VERBOSE > 1 ; + } + else { + croak "the given bank name ($bankName) doesn't match with any configuration parameters -- database downloading stopped" ; + } + + # manage if needed to download or not the bank (get or not the last version) + my ($bankFileExist, $bankFilePath, $bankFileDir) = (undef, undef, undef) ; + + if ($CONF->{$bankName.'_HTTP_FORMAT'}) { + $bankSuffixe = $CONF->{$bankName.'_HTTP_FORMAT'} ; + } + else { + croak "[ERROR] the given bank suffixe doesn't exist! please check your configuration parameters -- database download stopped" ; + } + + print "** Manage bank environment\n" if $VERBOSE > 1 ; + my $bankDir = $binPath.'/'.$CONF->{'LOCALBANK_PATH'} ; + if ( !-d $bankDir ) { + print "\tdir creation of $bankDir\n" if $VERBOSE > 1 ; + mkdir $bankDir ; + } + + + print "** Manage the download of the last version of the bank\n" if $VERBOSE > 1 ; + + if ( ( defined $bankUrl ) and ( defined $bankVersion ) ) { + if(-d $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion) { $bankFileExist = 'TRUE' ; } + else { $bankFileExist = 'FALSE' ; } + # Init the bank file name + $bankFileDir = $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion ; + $bankFilePath = $bankFileDir.'/'.$bankName.'_'.$bankVersion.'.'.lc($bankSuffixe) ; + # For final files (tabular/json) + $bankOutputTabularFile = $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion.'/'.$bankName.'_'.$bankVersion.'.tabular' ; + $bankOutputJsonFile = $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion.'/'.$bankName.'_'.$bankVersion.'.json' ; + + print "\tBuilding bank file dir: $bankFileDir\n" if $VERBOSE > 1 ; + print "\tBuilding bank file name: $bankFilePath\n" if $VERBOSE > 1 ; + + } + else { + croak "the given bank url ($bankUrl) and the detected version ar undef -- database download stopped" ; + } + + my $bankFile = $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'} ; + + # dowload the bank if does not exist ! + if ( $bankFileExist eq 'FALSE' ) { + + print "\t/!\\ The asked bank does not still exist /!\\\n" if $VERBOSE > 1 ; + if (!-d $bankFileDir) { + print "\tdir creation of $bankFileDir\n" if $VERBOSE > 1 ; + mkdir $bankFileDir ; + } + + if (-d $bankFileDir) { + print "\tDownload of the asked bank ($bankName)...\n" if $VERBOSE > 1 ; + print "\tFrom...$bankUrl\n" if $VERBOSE > 1 ; + my $oDownloader = utils->new() ; + my ($fileZip) = $oDownloader->getHttpFile($bankUrl, $bankFilePath) ; + + print "\tUnzip the download archive ($bankFilePath) and clean env ...\n" if $VERBOSE > 1 ; + + my $oUnzip = utils->new() ; + # if archive is a zip + if ($bankSuffixe eq 'ZIP') { + $oUnzip->unzipFile($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'}, $CONF->{$bankName.'_FILE_NAME'}) ; + $oUnzip->cleanUnzip($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'} ) ; + } + # elsif archive is a gz + elsif ($bankSuffixe eq 'GZ') { + $oUnzip->gunzipFile($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'}, $CONF->{$bankName.'_FILE_NAME'}) ; + $oUnzip->cleanUnzip($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'} ) ; + } + } + else { + croak "the given bank dir doesn't ($bankFileDir) exist or cannot be created - No download started\n" ; + } + } + else { + print "\t/!\\ $bankName Bank is already present on the disk... /!\\\n" if $VERBOSE > 1 ; + print "\t/!\\ ...in $bankFile /!\\\n" if $VERBOSE > 1 ; + } + + ## + if (-e $bankOutputTabularFile) { + # copy the ref file into the user history/session + my $ocsv = csv->new( ) ; + my $csv = $ocsv->get_csv_object("\t") ; + my ($refEntries, $status) = $ocsv->parse_allcsv_object($csv, \$bankOutputTabularFile, 'y') ; + $ocsv->write_csv_from_arrays($csv, $outTab, $refEntries) ; + print "\t/!\\ copy the ref file $bankOutputTabularFile into your session ($outTab)... /!\\\n" if $VERBOSE > 1 ; + } + ## the final tabular file does not exists - need to be created from xml + else { + + ## Build a HASH with all metabolites from downloaded xml + my ($handler, $metabolites, $nbEntries) = (undef, undef) ; + + if (-e $bankFile) { + my $oHandler = hmdb_api->new ; + + # in case the download file is in XML or Metabocard or SDF... + if ($CONF->{$bankName.'_DB_FORMAT'} eq 'XML') { + ($metabolites, $nbEntries) = $oHandler->getMetaboliteFeatures($bankFile) ; + print "\tExtraction of $nbEntries metabolites from $bankName XML file\n" if $VERBOSE > 1 ; + } + elsif ( ($CONF->{$bankName.'_DB_FORMAT'} eq 'CARD') ) { + ($handler, $nbEntries) = $oHandler->cowmetdb_handle($bankFile) ; + ($metabolites) = $oHandler->cowmetdb_hash($handler) ; + print "\tExtraction of $$nbEntries metabolites from $bankName CARD file\n" if $VERBOSE > 1 ; + } + else { + #TODO... + } + } + else { + print "\t/!\\ The bank does not exist: $bankFile\n" if $VERBOSE > 1 ; + } + + ## Write outputs ! + print "** Write outputs from HMDB in BiH and Json formats\n" if $VERBOSE > 1 ; + + if ( (defined $metabolites) and (defined $format) ) { + + ## Generation of M+H and M-H masses + my $ometmz = hmdb_api->new ; + my $completedMetabolites = undef ; + $completedMetabolites = $ometmz->setMetaboliteAcurrateMzToModesMz($CONF->{$bankName.'_DB_FORMAT'}, $metabolites, $CONF->{'PROTON_MASS'}, $CONF->{'ELECTRON_MASS'}, 1) ; + +# print Dumper $completedMetabolites ; + + ## tabular output + if ( ( $format eq 'tabular') and (defined $outTab) ) { + print "\tThe tabular output ($outTab) is created...\n" if $VERBOSE > 1 ; +# print Dumper $metabolites ; + # sort metabolites + my $omet = hmdb_api->new ; + my $sortedMetabolites = undef ; + + if ($CONF->{$bankName.'_DB_FORMAT'} eq 'XML') { + $sortedMetabolites = $omet->buildMetabolitesArray($completedMetabolites) ; + } + elsif ( ($CONF->{$bankName.'_DB_FORMAT'} eq 'CARD') ) { + $sortedMetabolites= $omet->cowmetdb_hash_to_inhouse_format($completedMetabolites) ; + } + + # print Dumper $sortedMetabolites ; + my $ocsv = csv->new( ) ; + my $csv = $ocsv->get_csv_object("\t") ; + # create the ref file for ./bank repo + $ocsv->write_csv_from_arrays($csv, $bankOutputTabularFile, $sortedMetabolites) ; + # generate also a copy for user history + $ocsv->write_csv_from_arrays($csv, $outTab, $sortedMetabolites) ; + + } + elsif ( (defined $format) and (defined $outJson) ) { + #TODO... + } + } + else { + croak "No metabolites are extracted from the $bankName bank file\n" ; + } + ## Clean local envt + unlink $bankFile if (-e $bankFile) ; + } +} ## END IF defined $bankName +else { + &help ; + croak "No bank name and format are defined - Please set one" ; +} + +print "\n*************!!End of the job ;-). Thank you for using W4M!!****************\n" if $VERBOSE > 1 ; +### END of main script + + + + + +#==================================================================================== +# Help subroutine called with -h option +# number of arguments : 0 +# Argument(s) : +# Return : 1 +#==================================================================================== +sub help { + print STDERR " +downloader_bank_hmdb.pl + +# downloader_bank_hmdb is a script to export specific tissue/matrix bank from HMDB source. +# Input : N/A +# Author : Franck Giacomoni +# Email : fgiacomoni\@inra.fr +# Version : 1.0 +# Created : 21/11/2018 +USAGE : + downloader_bank_hmdb.pl -bank [SERUM|URINE|CSF|...] -format [tabular|json] -outTab [tabular file name] + + "; + exit(1); +} + +## END of script - F Giacomoni + +__END__ + +=head1 NAME + + downloader_bank_hmdb.pl is a script to export specific tissue/matrix bank from HMDB source. + +=head1 USAGE + + downloader_bank_hmdb.pl -bank [serum|urine|...] -format [tabular|json] + or downloader_bank_hmdb.pl -help + +=head1 SYNOPSIS + +This script export specific tissue/matrix bank from HMDB source. + +=head1 DESCRIPTION + +This main program is a galaxy tool (W4M) allowing the export specific tissue/matrix bank from HMDB source in a tabular format. + +=over 4 + +=item B<function01> + +=item B<function02> + +=back + +=head1 AUTHOR + +Franck Giacomoni E<lt>franck.giacomoni@inra.frE<gt> + +=head1 LICENSE + +This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. + +=head1 VERSION + +version 1 : 21 / 11 / 2018 + +version 2 : ?? + +=cut \ No newline at end of file