view downloader_bank_hmdb.pl @ 1:4373f936111d draft

" master branch Updating with tag :CI_COMMIT_TAG - - Fxx"
author fgiacomoni
date Tue, 21 Jan 2020 16:09:45 -0500
parents 7c9269bded0e
children be504ccbc41c
line wrap: on
line source

#!perl

## script  : downloader_bank_hmdb.pl
#=============================================================================
#                              Included modules and versions
#=============================================================================
## Perl modules
use strict ;
use warnings ;
use Carp qw (cluck croak carp) ;

use Data::Dumper ;
use Getopt::Long ;
use File::Basename ;
use FindBin ; ## Allows you to locate the directory of original perl script

## Specific Perl Modules (PFEM)
use lib $FindBin::Bin.'/lib' ;
my $binPath = $FindBin::Bin ;

## Dedicate Perl Modules (Home made...)
use hmdb_api qw( :ALL ) ;
use utils qw( :ALL ) ;
use conf qw( :ALL ) ;
use csv qw( :ALL ) ;

## Initialized values
my $OptHelp ;
my $VERBOSE = 3 ;
my ($bankName, $format, $outTab, $outJson) = (undef, undef, undef, undef) ;

#=============================================================================
#                                Manage EXCEPTIONS
#=============================================================================
&GetOptions ( 	"h"     	=> \$OptHelp,       # HELP
				"bank|b:s"	=> \$bankName,		# bank name to get the rigth url
				"format|f:s"=> \$format,		# output format
				"outTab:s"	=> \$outTab,		# tabular output
				"outJson:s"	=> \$outJson,		# Json output
				"verbose:i"	=> \$VERBOSE,		# level of scriptt verbose [should be 1 or 3]


            ) ;
         
## if you put the option -help or -h function help is started
if ( defined($OptHelp) ){ &help ; }

#=============================================================================
#                                MAIN SCRIPT
#=============================================================================


# get conf informations
my ( $CONF ) = ( undef ) ;
foreach my $conf ( <$binPath/*.cfg> ) {
	my $oConf = conf::new() ;
	$CONF = $oConf->as_conf($conf) ;
}

# MAIN SCRIPT :

if ( (defined $bankName) ) {

	my ( $bankUrl, $bankVersion, $bankSuffixe ) = (undef, undef, undef) ;
	my ( $bankOutputTabularFile, $bankOutputJsonFile ) = (undef, undef) ;
	
	print "** Get version information from Wishart server databases (Canada)\n" if $VERBOSE > 1 ;
	
	# get information from conf
	if ($CONF->{$bankName.'_URL'}) {
		$bankUrl = $CONF->{$bankName.'_URL'}  ;
		# get version of the http resource
		my $oUtils = utils->new() ;
    	($bankVersion) = $oUtils->getHttpFileVersion($bankUrl) ;
    	
    	print "\tThe current version is: $bankVersion\n" if $VERBOSE > 1 ;	
	}
	else {
		croak "the given bank name ($bankName) doesn't match with any configuration parameters -- database downloading stopped" ;
	}
	
	# manage if needed to download or not the bank (get or not the last version)
	my ($bankFileExist, $bankFilePath, $bankFileDir) = (undef, undef, undef) ;
	
	if ($CONF->{$bankName.'_HTTP_FORMAT'}) {
		$bankSuffixe = $CONF->{$bankName.'_HTTP_FORMAT'}  ;
	}
	else {
		croak "[ERROR] the given bank suffixe doesn't exist! please check your configuration parameters -- database download stopped" ;
	}
	
	print "** Manage bank environment\n" if $VERBOSE > 1 ;
	my $bankDir = $binPath.'/'.$CONF->{'LOCALBANK_PATH'} ;
	if ( !-d $bankDir ) {
		print "\tdir creation of $bankDir\n" if $VERBOSE > 1 ;
		mkdir $bankDir ;
	}
	
	
	print "** Manage the download of the last version of the bank\n" if $VERBOSE > 1 ;
	
	if ( ( defined $bankUrl ) and ( defined $bankVersion ) ) {
		if(-d $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion) 	{ 		$bankFileExist = 'TRUE' ;  		}
		else 																			{		$bankFileExist = 'FALSE' ; 		}
		# Init the bank file name
		$bankFileDir = $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion ;
		$bankFilePath = $bankFileDir.'/'.$bankName.'_'.$bankVersion.'.'.lc($bankSuffixe) ;
		# For final files (tabular/json)
		$bankOutputTabularFile = $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion.'/'.$bankName.'_'.$bankVersion.'.tabular' ;
		$bankOutputJsonFile = $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion.'/'.$bankName.'_'.$bankVersion.'.json' ;
		
		print "\tBuilding bank file dir: $bankFileDir\n" if $VERBOSE > 1 ;
		print "\tBuilding bank file name: $bankFilePath\n" if $VERBOSE > 1 ;
		
	}
	else {
		croak "the given bank url ($bankUrl) and the detected version ar undef -- database download stopped" ;
	}
	
	my $bankFile = $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'} ;
	
	# dowload the bank if does not exist !
	if ( $bankFileExist eq 'FALSE' ) {
		
		print "\t/!\\ The asked bank does not still exist /!\\\n" if $VERBOSE > 1 ;
		if (!-d $bankFileDir) {
			print "\tdir creation of $bankFileDir\n" if $VERBOSE > 1 ;
			mkdir $bankFileDir ;
		}
		
		if (-d $bankFileDir) {
			print "\tDownload of the asked bank ($bankName)...\n" if $VERBOSE > 1 ;
			print "\tFrom...$bankUrl\n" if $VERBOSE > 1 ;
			my $oDownloader = utils->new() ;
			my ($fileZip) = $oDownloader->getHttpFile($bankUrl, $bankFilePath) ;
			
			print "\tUnzip the download archive ($bankFilePath) and clean env ...\n" if $VERBOSE > 1 ;
			
			my $oUnzip = utils->new() ;
			# if archive is a zip
			if ($bankSuffixe eq 'ZIP') {
				$oUnzip->unzipFile($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'}, $CONF->{$bankName.'_FILE_NAME'}) ;
    			$oUnzip->cleanUnzip($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'} ) ;
			}
			# elsif archive is a gz
			elsif ($bankSuffixe eq 'GZ') {
				$oUnzip->gunzipFile($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'}, $CONF->{$bankName.'_FILE_NAME'}) ;
				$oUnzip->cleanUnzip($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'} ) ;
			}
		}
		else {
			croak "the given bank dir doesn't ($bankFileDir) exist or cannot be created - No download started\n" ;
		}
	}
	else {
		print "\t/!\\ $bankName Bank is already present on the disk... /!\\\n" if $VERBOSE > 1 ;
		print "\t/!\\ ...in $bankFile /!\\\n" if $VERBOSE > 1 ;
	}
	
	## 
	if (-e $bankOutputTabularFile) {
		# copy the ref file into the user history/session
		my $ocsv = csv->new( ) ;
		my $csv = $ocsv->get_csv_object("\t") ;
		my ($refEntries, $status) = $ocsv->parse_allcsv_object($csv, \$bankOutputTabularFile, 'y') ;
		$ocsv->write_csv_from_arrays($csv, $outTab, $refEntries) ;
		print "\t/!\\ copy the ref file $bankOutputTabularFile into your session ($outTab)... /!\\\n" if $VERBOSE > 1 ;
	}
	## the final tabular file does not exists - need to be created from xml
	else {
		
		## Build a HASH with all metabolites from downloaded xml
		my ($handler, $metabolites, $nbEntries) = (undef, undef) ;
		
		if (-e $bankFile) {
			my $oHandler = hmdb_api->new ;
			
			# in case the download file is in XML or Metabocard or SDF...
			if ($CONF->{$bankName.'_DB_FORMAT'} eq 'XML') {
				($metabolites, $nbEntries) = $oHandler->getMetaboliteFeatures($bankFile) ;
				print "\tExtraction of $nbEntries metabolites from $bankName XML file\n" if $VERBOSE > 1 ;
			}
			elsif ( ($CONF->{$bankName.'_DB_FORMAT'} eq 'CARD') ) {
				($handler, $nbEntries) = $oHandler->cowmetdb_handle($bankFile) ;
				($metabolites) = $oHandler->cowmetdb_hash($handler) ;
				print "\tExtraction of $$nbEntries metabolites from $bankName CARD file\n" if $VERBOSE > 1 ;
			}
			else {
				#TODO...
			}
		}
		else {
			print "\t/!\\ The bank does not exist: $bankFile\n" if $VERBOSE > 1 ;
		}
		
		## Write outputs !
		print "** Write outputs from HMDB in BiH and Json formats\n" if $VERBOSE > 1 ;
		
		if ( (defined $metabolites) and (defined $format) ) {
			
			## Generation of M+H and M-H masses
			my $ometmz = hmdb_api->new ;
			my $completedMetabolites = undef ;
			$completedMetabolites = $ometmz->setMetaboliteAcurrateMzToModesMz($CONF->{$bankName.'_DB_FORMAT'}, $metabolites, $CONF->{'PROTON_MASS'}, $CONF->{'ELECTRON_MASS'}, 1) ;
			
#			print Dumper $completedMetabolites ;
			
			## tabular output
			if ( ( $format eq 'tabular') and (defined $outTab) ) {
				print "\tThe tabular output ($outTab) is created...\n" if $VERBOSE > 1 ;
#				print Dumper $metabolites ;
				# sort metabolites
				my $omet = hmdb_api->new ;
				my $sortedMetabolites = undef ;
				
				if ($CONF->{$bankName.'_DB_FORMAT'} eq 'XML') {
					$sortedMetabolites = $omet->buildMetabolitesArray($completedMetabolites) ;
				}
				elsif ( ($CONF->{$bankName.'_DB_FORMAT'} eq 'CARD') ) {
					$sortedMetabolites= $omet->cowmetdb_hash_to_inhouse_format($completedMetabolites) ;
				}
				
	#			print Dumper $sortedMetabolites ;
				my $ocsv = csv->new( ) ;
				my $csv = $ocsv->get_csv_object("\t") ;
				# create the ref file for ./bank repo
				$ocsv->write_csv_from_arrays($csv, $bankOutputTabularFile, $sortedMetabolites) ;
				# generate also a copy for user history
				$ocsv->write_csv_from_arrays($csv, $outTab, $sortedMetabolites) ;
				
			}
			elsif ( (defined $format) and (defined $outJson) ) {
				#TODO...
			}
		}
		else {
			croak "No metabolites are extracted from the $bankName bank file\n" ;
		}
		## Clean local envt
		unlink $bankFile if (-e $bankFile) ;
	}
} ## END IF defined $bankName
else {
	&help ;
	croak "No bank name and format are defined - Please set one" ;
}

print "\n*************!!End of the job ;-). Thank you for using W4M!!****************\n" if $VERBOSE > 1 ;
### END of main script





#====================================================================================
# Help subroutine called with -h option
# number of arguments : 0
# Argument(s)        :
# Return           : 1
#====================================================================================
sub help {
	print STDERR "
downloader_bank_hmdb.pl

# downloader_bank_hmdb is a script to export specific tissue/matrix bank from HMDB source.
# Input : N/A
# Author : Franck Giacomoni
# Email : fgiacomoni\@inra.fr
# Version : 1.0
# Created : 21/11/2018
USAGE :		 
		downloader_bank_hmdb.pl -bank [SERUM|URINE|CSF|...] -format [tabular|json] -outTab [tabular file name]
		
		";
	exit(1);
}

## END of script - F Giacomoni 

__END__

=head1 NAME

 downloader_bank_hmdb.pl is a script to export specific tissue/matrix bank from HMDB source.

=head1 USAGE

 downloader_bank_hmdb.pl -bank [serum|urine|...] -format [tabular|json] 
 or downloader_bank_hmdb.pl -help

=head1 SYNOPSIS

This script export specific tissue/matrix bank from HMDB source. 

=head1 DESCRIPTION

This main program is a galaxy tool (W4M) allowing the export specific tissue/matrix bank from HMDB source in a tabular format.

=over 4

=item B<function01>

=item B<function02>

=back

=head1 AUTHOR

Franck Giacomoni E<lt>franck.giacomoni@inra.frE<gt>

=head1 LICENSE

This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.

=head1 VERSION

version 1 : 21 / 11 / 2018

version 2 : ??

=cut