0
|
1 #!perl
|
|
2
|
|
3 ## script : downloader_bank_hmdb.pl
|
|
4 #=============================================================================
|
|
5 # Included modules and versions
|
|
6 #=============================================================================
|
|
7 ## Perl modules
|
|
8 use strict ;
|
|
9 use warnings ;
|
|
10 use Carp qw (cluck croak carp) ;
|
|
11
|
|
12 use Data::Dumper ;
|
|
13 use Getopt::Long ;
|
|
14 use File::Basename ;
|
|
15 use FindBin ; ## Allows you to locate the directory of original perl script
|
|
16
|
|
17 ## Specific Perl Modules (PFEM)
|
|
18 use lib $FindBin::Bin.'/lib' ;
|
|
19 my $binPath = $FindBin::Bin ;
|
|
20
|
|
21 ## Dedicate Perl Modules (Home made...)
|
|
22 use hmdb_api qw( :ALL ) ;
|
|
23 use utils qw( :ALL ) ;
|
|
24 use conf qw( :ALL ) ;
|
|
25 use csv qw( :ALL ) ;
|
|
26
|
|
27 ## Initialized values
|
|
28 my $OptHelp ;
|
|
29 my $VERBOSE = 3 ;
|
|
30 my ($bankName, $format, $outTab, $outJson) = (undef, undef, undef, undef) ;
|
|
31
|
|
32 #=============================================================================
|
|
33 # Manage EXCEPTIONS
|
|
34 #=============================================================================
|
|
35 &GetOptions ( "h" => \$OptHelp, # HELP
|
|
36 "bank|b:s" => \$bankName, # bank name to get the rigth url
|
|
37 "format|f:s"=> \$format, # output format
|
|
38 "outTab:s" => \$outTab, # tabular output
|
|
39 "outJson:s" => \$outJson, # Json output
|
|
40 "verbose:i" => \$VERBOSE, # level of scriptt verbose [should be 1 or 3]
|
|
41
|
|
42
|
|
43 ) ;
|
|
44
|
|
45 ## if you put the option -help or -h function help is started
|
|
46 if ( defined($OptHelp) ){ &help ; }
|
|
47
|
|
48 #=============================================================================
|
|
49 # MAIN SCRIPT
|
|
50 #=============================================================================
|
|
51
|
|
52
|
|
53 # get conf informations
|
|
54 my ( $CONF ) = ( undef ) ;
|
|
55 foreach my $conf ( <$binPath/*.cfg> ) {
|
|
56 my $oConf = conf::new() ;
|
|
57 $CONF = $oConf->as_conf($conf) ;
|
|
58 }
|
|
59
|
|
60 # MAIN SCRIPT :
|
|
61
|
|
62 if ( (defined $bankName) ) {
|
|
63
|
|
64 my ( $bankUrl, $bankVersion, $bankSuffixe ) = (undef, undef, undef) ;
|
|
65 my ( $bankOutputTabularFile, $bankOutputJsonFile ) = (undef, undef) ;
|
|
66
|
|
67 print "** Get version information from Wishart server databases (Canada)\n" if $VERBOSE > 1 ;
|
|
68
|
|
69 # get information from conf
|
|
70 if ($CONF->{$bankName.'_URL'}) {
|
|
71 $bankUrl = $CONF->{$bankName.'_URL'} ;
|
|
72 # get version of the http resource
|
|
73 my $oUtils = utils->new() ;
|
|
74 ($bankVersion) = $oUtils->getHttpFileVersion($bankUrl) ;
|
|
75
|
|
76 print "\tThe current version is: $bankVersion\n" if $VERBOSE > 1 ;
|
|
77 }
|
|
78 else {
|
|
79 croak "the given bank name ($bankName) doesn't match with any configuration parameters -- database downloading stopped" ;
|
|
80 }
|
|
81
|
|
82 # manage if needed to download or not the bank (get or not the last version)
|
|
83 my ($bankFileExist, $bankFilePath, $bankFileDir) = (undef, undef, undef) ;
|
|
84
|
|
85 if ($CONF->{$bankName.'_HTTP_FORMAT'}) {
|
|
86 $bankSuffixe = $CONF->{$bankName.'_HTTP_FORMAT'} ;
|
|
87 }
|
|
88 else {
|
|
89 croak "[ERROR] the given bank suffixe doesn't exist! please check your configuration parameters -- database download stopped" ;
|
|
90 }
|
|
91
|
|
92 print "** Manage bank environment\n" if $VERBOSE > 1 ;
|
|
93 my $bankDir = $binPath.'/'.$CONF->{'LOCALBANK_PATH'} ;
|
|
94 if ( !-d $bankDir ) {
|
|
95 print "\tdir creation of $bankDir\n" if $VERBOSE > 1 ;
|
|
96 mkdir $bankDir ;
|
|
97 }
|
|
98
|
|
99
|
|
100 print "** Manage the download of the last version of the bank\n" if $VERBOSE > 1 ;
|
|
101
|
|
102 if ( ( defined $bankUrl ) and ( defined $bankVersion ) ) {
|
|
103 if(-d $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion) { $bankFileExist = 'TRUE' ; }
|
|
104 else { $bankFileExist = 'FALSE' ; }
|
|
105 # Init the bank file name
|
|
106 $bankFileDir = $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion ;
|
|
107 $bankFilePath = $bankFileDir.'/'.$bankName.'_'.$bankVersion.'.'.lc($bankSuffixe) ;
|
|
108 # For final files (tabular/json)
|
|
109 $bankOutputTabularFile = $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion.'/'.$bankName.'_'.$bankVersion.'.tabular' ;
|
|
110 $bankOutputJsonFile = $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion.'/'.$bankName.'_'.$bankVersion.'.json' ;
|
|
111
|
|
112 print "\tBuilding bank file dir: $bankFileDir\n" if $VERBOSE > 1 ;
|
|
113 print "\tBuilding bank file name: $bankFilePath\n" if $VERBOSE > 1 ;
|
|
114
|
|
115 }
|
|
116 else {
|
|
117 croak "the given bank url ($bankUrl) and the detected version ar undef -- database download stopped" ;
|
|
118 }
|
|
119
|
|
120 my $bankFile = $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'} ;
|
|
121
|
|
122 # dowload the bank if does not exist !
|
|
123 if ( $bankFileExist eq 'FALSE' ) {
|
|
124
|
|
125 print "\t/!\\ The asked bank does not still exist /!\\\n" if $VERBOSE > 1 ;
|
|
126 if (!-d $bankFileDir) {
|
|
127 print "\tdir creation of $bankFileDir\n" if $VERBOSE > 1 ;
|
|
128 mkdir $bankFileDir ;
|
|
129 }
|
|
130
|
|
131 if (-d $bankFileDir) {
|
|
132 print "\tDownload of the asked bank ($bankName)...\n" if $VERBOSE > 1 ;
|
|
133 print "\tFrom...$bankUrl\n" if $VERBOSE > 1 ;
|
|
134 my $oDownloader = utils->new() ;
|
|
135 my ($fileZip) = $oDownloader->getHttpFile($bankUrl, $bankFilePath) ;
|
|
136
|
|
137 print "\tUnzip the download archive ($bankFilePath) and clean env ...\n" if $VERBOSE > 1 ;
|
|
138
|
|
139 my $oUnzip = utils->new() ;
|
|
140 # if archive is a zip
|
|
141 if ($bankSuffixe eq 'ZIP') {
|
|
142 $oUnzip->unzipFile($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'}, $CONF->{$bankName.'_FILE_NAME'}) ;
|
|
143 $oUnzip->cleanUnzip($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'} ) ;
|
|
144 }
|
|
145 # elsif archive is a gz
|
|
146 elsif ($bankSuffixe eq 'GZ') {
|
|
147 $oUnzip->gunzipFile($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'}, $CONF->{$bankName.'_FILE_NAME'}) ;
|
|
148 $oUnzip->cleanUnzip($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'} ) ;
|
|
149 }
|
|
150 }
|
|
151 else {
|
|
152 croak "the given bank dir doesn't ($bankFileDir) exist or cannot be created - No download started\n" ;
|
|
153 }
|
|
154 }
|
|
155 else {
|
|
156 print "\t/!\\ $bankName Bank is already present on the disk... /!\\\n" if $VERBOSE > 1 ;
|
|
157 print "\t/!\\ ...in $bankFile /!\\\n" if $VERBOSE > 1 ;
|
|
158 }
|
|
159
|
|
160 ##
|
|
161 if (-e $bankOutputTabularFile) {
|
|
162 # copy the ref file into the user history/session
|
|
163 my $ocsv = csv->new( ) ;
|
|
164 my $csv = $ocsv->get_csv_object("\t") ;
|
|
165 my ($refEntries, $status) = $ocsv->parse_allcsv_object($csv, \$bankOutputTabularFile, 'y') ;
|
|
166 $ocsv->write_csv_from_arrays($csv, $outTab, $refEntries) ;
|
|
167 print "\t/!\\ copy the ref file $bankOutputTabularFile into your session ($outTab)... /!\\\n" if $VERBOSE > 1 ;
|
|
168 }
|
|
169 ## the final tabular file does not exists - need to be created from xml
|
|
170 else {
|
|
171
|
|
172 ## Build a HASH with all metabolites from downloaded xml
|
|
173 my ($handler, $metabolites, $nbEntries) = (undef, undef) ;
|
|
174
|
|
175 if (-e $bankFile) {
|
|
176 my $oHandler = hmdb_api->new ;
|
|
177
|
|
178 # in case the download file is in XML or Metabocard or SDF...
|
|
179 if ($CONF->{$bankName.'_DB_FORMAT'} eq 'XML') {
|
|
180 ($metabolites, $nbEntries) = $oHandler->getMetaboliteFeatures($bankFile) ;
|
|
181 print "\tExtraction of $nbEntries metabolites from $bankName XML file\n" if $VERBOSE > 1 ;
|
|
182 }
|
|
183 elsif ( ($CONF->{$bankName.'_DB_FORMAT'} eq 'CARD') ) {
|
|
184 ($handler, $nbEntries) = $oHandler->cowmetdb_handle($bankFile) ;
|
|
185 ($metabolites) = $oHandler->cowmetdb_hash($handler) ;
|
|
186 print "\tExtraction of $$nbEntries metabolites from $bankName CARD file\n" if $VERBOSE > 1 ;
|
|
187 }
|
|
188 else {
|
|
189 #TODO...
|
|
190 }
|
|
191 }
|
|
192 else {
|
|
193 print "\t/!\\ The bank does not exist: $bankFile\n" if $VERBOSE > 1 ;
|
|
194 }
|
|
195
|
|
196 ## Write outputs !
|
|
197 print "** Write outputs from HMDB in BiH and Json formats\n" if $VERBOSE > 1 ;
|
|
198
|
|
199 if ( (defined $metabolites) and (defined $format) ) {
|
|
200
|
|
201 ## Generation of M+H and M-H masses
|
|
202 my $ometmz = hmdb_api->new ;
|
|
203 my $completedMetabolites = undef ;
|
|
204 $completedMetabolites = $ometmz->setMetaboliteAcurrateMzToModesMz($CONF->{$bankName.'_DB_FORMAT'}, $metabolites, $CONF->{'PROTON_MASS'}, $CONF->{'ELECTRON_MASS'}, 1) ;
|
|
205
|
|
206 # print Dumper $completedMetabolites ;
|
|
207
|
|
208 ## tabular output
|
|
209 if ( ( $format eq 'tabular') and (defined $outTab) ) {
|
|
210 print "\tThe tabular output ($outTab) is created...\n" if $VERBOSE > 1 ;
|
|
211 # print Dumper $metabolites ;
|
|
212 # sort metabolites
|
|
213 my $omet = hmdb_api->new ;
|
|
214 my $sortedMetabolites = undef ;
|
|
215
|
|
216 if ($CONF->{$bankName.'_DB_FORMAT'} eq 'XML') {
|
|
217 $sortedMetabolites = $omet->buildMetabolitesArray($completedMetabolites) ;
|
|
218 }
|
|
219 elsif ( ($CONF->{$bankName.'_DB_FORMAT'} eq 'CARD') ) {
|
|
220 $sortedMetabolites= $omet->cowmetdb_hash_to_inhouse_format($completedMetabolites) ;
|
|
221 }
|
|
222
|
|
223 # print Dumper $sortedMetabolites ;
|
|
224 my $ocsv = csv->new( ) ;
|
|
225 my $csv = $ocsv->get_csv_object("\t") ;
|
|
226 # create the ref file for ./bank repo
|
|
227 $ocsv->write_csv_from_arrays($csv, $bankOutputTabularFile, $sortedMetabolites) ;
|
|
228 # generate also a copy for user history
|
|
229 $ocsv->write_csv_from_arrays($csv, $outTab, $sortedMetabolites) ;
|
|
230
|
|
231 }
|
|
232 elsif ( (defined $format) and (defined $outJson) ) {
|
|
233 #TODO...
|
|
234 }
|
|
235 }
|
|
236 else {
|
|
237 croak "No metabolites are extracted from the $bankName bank file\n" ;
|
|
238 }
|
|
239 ## Clean local envt
|
|
240 unlink $bankFile if (-e $bankFile) ;
|
|
241 }
|
|
242 } ## END IF defined $bankName
|
|
243 else {
|
|
244 &help ;
|
|
245 croak "No bank name and format are defined - Please set one" ;
|
|
246 }
|
|
247
|
|
248 print "\n*************!!End of the job ;-). Thank you for using W4M!!****************\n" if $VERBOSE > 1 ;
|
|
249 ### END of main script
|
|
250
|
|
251
|
|
252
|
|
253
|
|
254
|
|
255 #====================================================================================
|
|
256 # Help subroutine called with -h option
|
|
257 # number of arguments : 0
|
|
258 # Argument(s) :
|
|
259 # Return : 1
|
|
260 #====================================================================================
|
|
261 sub help {
|
|
262 print STDERR "
|
|
263 downloader_bank_hmdb.pl
|
|
264
|
|
265 # downloader_bank_hmdb is a script to export specific tissue/matrix bank from HMDB source.
|
|
266 # Input : N/A
|
|
267 # Author : Franck Giacomoni
|
|
268 # Email : fgiacomoni\@inra.fr
|
|
269 # Version : 1.0
|
|
270 # Created : 21/11/2018
|
|
271 USAGE :
|
|
272 downloader_bank_hmdb.pl -bank [SERUM|URINE|CSF|...] -format [tabular|json] -outTab [tabular file name]
|
|
273
|
|
274 ";
|
|
275 exit(1);
|
|
276 }
|
|
277
|
|
278 ## END of script - F Giacomoni
|
|
279
|
|
280 __END__
|
|
281
|
|
282 =head1 NAME
|
|
283
|
|
284 downloader_bank_hmdb.pl is a script to export specific tissue/matrix bank from HMDB source.
|
|
285
|
|
286 =head1 USAGE
|
|
287
|
|
288 downloader_bank_hmdb.pl -bank [serum|urine|...] -format [tabular|json]
|
|
289 or downloader_bank_hmdb.pl -help
|
|
290
|
|
291 =head1 SYNOPSIS
|
|
292
|
|
293 This script export specific tissue/matrix bank from HMDB source.
|
|
294
|
|
295 =head1 DESCRIPTION
|
|
296
|
|
297 This main program is a galaxy tool (W4M) allowing the export specific tissue/matrix bank from HMDB source in a tabular format.
|
|
298
|
|
299 =over 4
|
|
300
|
|
301 =item B<function01>
|
|
302
|
|
303 =item B<function02>
|
|
304
|
|
305 =back
|
|
306
|
|
307 =head1 AUTHOR
|
|
308
|
|
309 Franck Giacomoni E<lt>franck.giacomoni@inra.frE<gt>
|
|
310
|
|
311 =head1 LICENSE
|
|
312
|
|
313 This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
|
|
314
|
|
315 =head1 VERSION
|
|
316
|
|
317 version 1 : 21 / 11 / 2018
|
|
318
|
|
319 version 2 : ??
|
|
320
|
|
321 =cut |