comparison downloader_bank_hmdb.pl @ 0:7c9269bded0e draft

Init repository for [downloader_bank_hmdb]
author fgiacomoni
date Tue, 14 Jan 2020 05:21:23 -0500
parents
children be504ccbc41c
comparison
equal deleted inserted replaced
-1:000000000000 0:7c9269bded0e
1 #!perl
2
3 ## script : downloader_bank_hmdb.pl
4 #=============================================================================
5 # Included modules and versions
6 #=============================================================================
7 ## Perl modules
8 use strict ;
9 use warnings ;
10 use Carp qw (cluck croak carp) ;
11
12 use Data::Dumper ;
13 use Getopt::Long ;
14 use File::Basename ;
15 use FindBin ; ## Allows you to locate the directory of original perl script
16
17 ## Specific Perl Modules (PFEM)
18 use lib $FindBin::Bin.'/lib' ;
19 my $binPath = $FindBin::Bin ;
20
21 ## Dedicate Perl Modules (Home made...)
22 use hmdb_api qw( :ALL ) ;
23 use utils qw( :ALL ) ;
24 use conf qw( :ALL ) ;
25 use csv qw( :ALL ) ;
26
27 ## Initialized values
28 my $OptHelp ;
29 my $VERBOSE = 3 ;
30 my ($bankName, $format, $outTab, $outJson) = (undef, undef, undef, undef) ;
31
32 #=============================================================================
33 # Manage EXCEPTIONS
34 #=============================================================================
35 &GetOptions ( "h" => \$OptHelp, # HELP
36 "bank|b:s" => \$bankName, # bank name to get the rigth url
37 "format|f:s"=> \$format, # output format
38 "outTab:s" => \$outTab, # tabular output
39 "outJson:s" => \$outJson, # Json output
40 "verbose:i" => \$VERBOSE, # level of scriptt verbose [should be 1 or 3]
41
42
43 ) ;
44
45 ## if you put the option -help or -h function help is started
46 if ( defined($OptHelp) ){ &help ; }
47
48 #=============================================================================
49 # MAIN SCRIPT
50 #=============================================================================
51
52
53 # get conf informations
54 my ( $CONF ) = ( undef ) ;
55 foreach my $conf ( <$binPath/*.cfg> ) {
56 my $oConf = conf::new() ;
57 $CONF = $oConf->as_conf($conf) ;
58 }
59
60 # MAIN SCRIPT :
61
62 if ( (defined $bankName) ) {
63
64 my ( $bankUrl, $bankVersion, $bankSuffixe ) = (undef, undef, undef) ;
65 my ( $bankOutputTabularFile, $bankOutputJsonFile ) = (undef, undef) ;
66
67 print "** Get version information from Wishart server databases (Canada)\n" if $VERBOSE > 1 ;
68
69 # get information from conf
70 if ($CONF->{$bankName.'_URL'}) {
71 $bankUrl = $CONF->{$bankName.'_URL'} ;
72 # get version of the http resource
73 my $oUtils = utils->new() ;
74 ($bankVersion) = $oUtils->getHttpFileVersion($bankUrl) ;
75
76 print "\tThe current version is: $bankVersion\n" if $VERBOSE > 1 ;
77 }
78 else {
79 croak "the given bank name ($bankName) doesn't match with any configuration parameters -- database downloading stopped" ;
80 }
81
82 # manage if needed to download or not the bank (get or not the last version)
83 my ($bankFileExist, $bankFilePath, $bankFileDir) = (undef, undef, undef) ;
84
85 if ($CONF->{$bankName.'_HTTP_FORMAT'}) {
86 $bankSuffixe = $CONF->{$bankName.'_HTTP_FORMAT'} ;
87 }
88 else {
89 croak "[ERROR] the given bank suffixe doesn't exist! please check your configuration parameters -- database download stopped" ;
90 }
91
92 print "** Manage bank environment\n" if $VERBOSE > 1 ;
93 my $bankDir = $binPath.'/'.$CONF->{'LOCALBANK_PATH'} ;
94 if ( !-d $bankDir ) {
95 print "\tdir creation of $bankDir\n" if $VERBOSE > 1 ;
96 mkdir $bankDir ;
97 }
98
99
100 print "** Manage the download of the last version of the bank\n" if $VERBOSE > 1 ;
101
102 if ( ( defined $bankUrl ) and ( defined $bankVersion ) ) {
103 if(-d $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion) { $bankFileExist = 'TRUE' ; }
104 else { $bankFileExist = 'FALSE' ; }
105 # Init the bank file name
106 $bankFileDir = $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion ;
107 $bankFilePath = $bankFileDir.'/'.$bankName.'_'.$bankVersion.'.'.lc($bankSuffixe) ;
108 # For final files (tabular/json)
109 $bankOutputTabularFile = $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion.'/'.$bankName.'_'.$bankVersion.'.tabular' ;
110 $bankOutputJsonFile = $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion.'/'.$bankName.'_'.$bankVersion.'.json' ;
111
112 print "\tBuilding bank file dir: $bankFileDir\n" if $VERBOSE > 1 ;
113 print "\tBuilding bank file name: $bankFilePath\n" if $VERBOSE > 1 ;
114
115 }
116 else {
117 croak "the given bank url ($bankUrl) and the detected version ar undef -- database download stopped" ;
118 }
119
120 my $bankFile = $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'} ;
121
122 # dowload the bank if does not exist !
123 if ( $bankFileExist eq 'FALSE' ) {
124
125 print "\t/!\\ The asked bank does not still exist /!\\\n" if $VERBOSE > 1 ;
126 if (!-d $bankFileDir) {
127 print "\tdir creation of $bankFileDir\n" if $VERBOSE > 1 ;
128 mkdir $bankFileDir ;
129 }
130
131 if (-d $bankFileDir) {
132 print "\tDownload of the asked bank ($bankName)...\n" if $VERBOSE > 1 ;
133 print "\tFrom...$bankUrl\n" if $VERBOSE > 1 ;
134 my $oDownloader = utils->new() ;
135 my ($fileZip) = $oDownloader->getHttpFile($bankUrl, $bankFilePath) ;
136
137 print "\tUnzip the download archive ($bankFilePath) and clean env ...\n" if $VERBOSE > 1 ;
138
139 my $oUnzip = utils->new() ;
140 # if archive is a zip
141 if ($bankSuffixe eq 'ZIP') {
142 $oUnzip->unzipFile($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'}, $CONF->{$bankName.'_FILE_NAME'}) ;
143 $oUnzip->cleanUnzip($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'} ) ;
144 }
145 # elsif archive is a gz
146 elsif ($bankSuffixe eq 'GZ') {
147 $oUnzip->gunzipFile($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'}, $CONF->{$bankName.'_FILE_NAME'}) ;
148 $oUnzip->cleanUnzip($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'} ) ;
149 }
150 }
151 else {
152 croak "the given bank dir doesn't ($bankFileDir) exist or cannot be created - No download started\n" ;
153 }
154 }
155 else {
156 print "\t/!\\ $bankName Bank is already present on the disk... /!\\\n" if $VERBOSE > 1 ;
157 print "\t/!\\ ...in $bankFile /!\\\n" if $VERBOSE > 1 ;
158 }
159
160 ##
161 if (-e $bankOutputTabularFile) {
162 # copy the ref file into the user history/session
163 my $ocsv = csv->new( ) ;
164 my $csv = $ocsv->get_csv_object("\t") ;
165 my ($refEntries, $status) = $ocsv->parse_allcsv_object($csv, \$bankOutputTabularFile, 'y') ;
166 $ocsv->write_csv_from_arrays($csv, $outTab, $refEntries) ;
167 print "\t/!\\ copy the ref file $bankOutputTabularFile into your session ($outTab)... /!\\\n" if $VERBOSE > 1 ;
168 }
169 ## the final tabular file does not exists - need to be created from xml
170 else {
171
172 ## Build a HASH with all metabolites from downloaded xml
173 my ($handler, $metabolites, $nbEntries) = (undef, undef) ;
174
175 if (-e $bankFile) {
176 my $oHandler = hmdb_api->new ;
177
178 # in case the download file is in XML or Metabocard or SDF...
179 if ($CONF->{$bankName.'_DB_FORMAT'} eq 'XML') {
180 ($metabolites, $nbEntries) = $oHandler->getMetaboliteFeatures($bankFile) ;
181 print "\tExtraction of $nbEntries metabolites from $bankName XML file\n" if $VERBOSE > 1 ;
182 }
183 elsif ( ($CONF->{$bankName.'_DB_FORMAT'} eq 'CARD') ) {
184 ($handler, $nbEntries) = $oHandler->cowmetdb_handle($bankFile) ;
185 ($metabolites) = $oHandler->cowmetdb_hash($handler) ;
186 print "\tExtraction of $$nbEntries metabolites from $bankName CARD file\n" if $VERBOSE > 1 ;
187 }
188 else {
189 #TODO...
190 }
191 }
192 else {
193 print "\t/!\\ The bank does not exist: $bankFile\n" if $VERBOSE > 1 ;
194 }
195
196 ## Write outputs !
197 print "** Write outputs from HMDB in BiH and Json formats\n" if $VERBOSE > 1 ;
198
199 if ( (defined $metabolites) and (defined $format) ) {
200
201 ## Generation of M+H and M-H masses
202 my $ometmz = hmdb_api->new ;
203 my $completedMetabolites = undef ;
204 $completedMetabolites = $ometmz->setMetaboliteAcurrateMzToModesMz($CONF->{$bankName.'_DB_FORMAT'}, $metabolites, $CONF->{'PROTON_MASS'}, $CONF->{'ELECTRON_MASS'}, 1) ;
205
206 # print Dumper $completedMetabolites ;
207
208 ## tabular output
209 if ( ( $format eq 'tabular') and (defined $outTab) ) {
210 print "\tThe tabular output ($outTab) is created...\n" if $VERBOSE > 1 ;
211 # print Dumper $metabolites ;
212 # sort metabolites
213 my $omet = hmdb_api->new ;
214 my $sortedMetabolites = undef ;
215
216 if ($CONF->{$bankName.'_DB_FORMAT'} eq 'XML') {
217 $sortedMetabolites = $omet->buildMetabolitesArray($completedMetabolites) ;
218 }
219 elsif ( ($CONF->{$bankName.'_DB_FORMAT'} eq 'CARD') ) {
220 $sortedMetabolites= $omet->cowmetdb_hash_to_inhouse_format($completedMetabolites) ;
221 }
222
223 # print Dumper $sortedMetabolites ;
224 my $ocsv = csv->new( ) ;
225 my $csv = $ocsv->get_csv_object("\t") ;
226 # create the ref file for ./bank repo
227 $ocsv->write_csv_from_arrays($csv, $bankOutputTabularFile, $sortedMetabolites) ;
228 # generate also a copy for user history
229 $ocsv->write_csv_from_arrays($csv, $outTab, $sortedMetabolites) ;
230
231 }
232 elsif ( (defined $format) and (defined $outJson) ) {
233 #TODO...
234 }
235 }
236 else {
237 croak "No metabolites are extracted from the $bankName bank file\n" ;
238 }
239 ## Clean local envt
240 unlink $bankFile if (-e $bankFile) ;
241 }
242 } ## END IF defined $bankName
243 else {
244 &help ;
245 croak "No bank name and format are defined - Please set one" ;
246 }
247
248 print "\n*************!!End of the job ;-). Thank you for using W4M!!****************\n" if $VERBOSE > 1 ;
249 ### END of main script
250
251
252
253
254
255 #====================================================================================
256 # Help subroutine called with -h option
257 # number of arguments : 0
258 # Argument(s) :
259 # Return : 1
260 #====================================================================================
261 sub help {
262 print STDERR "
263 downloader_bank_hmdb.pl
264
265 # downloader_bank_hmdb is a script to export specific tissue/matrix bank from HMDB source.
266 # Input : N/A
267 # Author : Franck Giacomoni
268 # Email : fgiacomoni\@inra.fr
269 # Version : 1.0
270 # Created : 21/11/2018
271 USAGE :
272 downloader_bank_hmdb.pl -bank [SERUM|URINE|CSF|...] -format [tabular|json] -outTab [tabular file name]
273
274 ";
275 exit(1);
276 }
277
278 ## END of script - F Giacomoni
279
280 __END__
281
282 =head1 NAME
283
284 downloader_bank_hmdb.pl is a script to export specific tissue/matrix bank from HMDB source.
285
286 =head1 USAGE
287
288 downloader_bank_hmdb.pl -bank [serum|urine|...] -format [tabular|json]
289 or downloader_bank_hmdb.pl -help
290
291 =head1 SYNOPSIS
292
293 This script export specific tissue/matrix bank from HMDB source.
294
295 =head1 DESCRIPTION
296
297 This main program is a galaxy tool (W4M) allowing the export specific tissue/matrix bank from HMDB source in a tabular format.
298
299 =over 4
300
301 =item B<function01>
302
303 =item B<function02>
304
305 =back
306
307 =head1 AUTHOR
308
309 Franck Giacomoni E<lt>franck.giacomoni@inra.frE<gt>
310
311 =head1 LICENSE
312
313 This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
314
315 =head1 VERSION
316
317 version 1 : 21 / 11 / 2018
318
319 version 2 : ??
320
321 =cut