Mercurial > repos > fgiacomoni > downloader_bank_hmdb
annotate downloader_bank_hmdb.pl @ 2:be504ccbc41c draft default tip
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
author | fgiacomoni |
---|---|
date | Wed, 30 Nov 2022 16:14:27 +0000 |
parents | 7c9269bded0e |
children |
rev | line source |
---|---|
0 | 1 #!perl |
2 | |
3 ## script : downloader_bank_hmdb.pl | |
4 #============================================================================= | |
5 # Included modules and versions | |
6 #============================================================================= | |
7 ## Perl modules | |
8 use strict ; | |
9 use warnings ; | |
10 use Carp qw (cluck croak carp) ; | |
11 | |
12 use Data::Dumper ; | |
13 use Getopt::Long ; | |
14 use File::Basename ; | |
15 use FindBin ; ## Allows you to locate the directory of original perl script | |
16 | |
17 ## Specific Perl Modules (PFEM) | |
18 use lib $FindBin::Bin.'/lib' ; | |
19 my $binPath = $FindBin::Bin ; | |
20 | |
21 ## Dedicate Perl Modules (Home made...) | |
22 use hmdb_api qw( :ALL ) ; | |
23 use utils qw( :ALL ) ; | |
24 use conf qw( :ALL ) ; | |
25 use csv qw( :ALL ) ; | |
26 | |
27 ## Initialized values | |
28 my $OptHelp ; | |
29 my $VERBOSE = 3 ; | |
30 my ($bankName, $format, $outTab, $outJson) = (undef, undef, undef, undef) ; | |
31 | |
32 #============================================================================= | |
33 # Manage EXCEPTIONS | |
34 #============================================================================= | |
35 &GetOptions ( "h" => \$OptHelp, # HELP | |
36 "bank|b:s" => \$bankName, # bank name to get the rigth url | |
37 "format|f:s"=> \$format, # output format | |
38 "outTab:s" => \$outTab, # tabular output | |
39 "outJson:s" => \$outJson, # Json output | |
40 "verbose:i" => \$VERBOSE, # level of scriptt verbose [should be 1 or 3] | |
41 | |
42 | |
43 ) ; | |
44 | |
45 ## if you put the option -help or -h function help is started | |
46 if ( defined($OptHelp) ){ &help ; } | |
47 | |
48 #============================================================================= | |
49 # MAIN SCRIPT | |
50 #============================================================================= | |
51 | |
52 | |
53 # get conf informations | |
54 my ( $CONF ) = ( undef ) ; | |
55 foreach my $conf ( <$binPath/*.cfg> ) { | |
56 my $oConf = conf::new() ; | |
57 $CONF = $oConf->as_conf($conf) ; | |
58 } | |
59 | |
60 # MAIN SCRIPT : | |
61 | |
62 if ( (defined $bankName) ) { | |
63 | |
64 my ( $bankUrl, $bankVersion, $bankSuffixe ) = (undef, undef, undef) ; | |
65 my ( $bankOutputTabularFile, $bankOutputJsonFile ) = (undef, undef) ; | |
66 | |
67 print "** Get version information from Wishart server databases (Canada)\n" if $VERBOSE > 1 ; | |
68 | |
69 # get information from conf | |
70 if ($CONF->{$bankName.'_URL'}) { | |
71 $bankUrl = $CONF->{$bankName.'_URL'} ; | |
72 # get version of the http resource | |
73 my $oUtils = utils->new() ; | |
2
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
74 $oUtils->checkHttpUrl($bankUrl) ; |
0 | 75 ($bankVersion) = $oUtils->getHttpFileVersion($bankUrl) ; |
76 | |
77 print "\tThe current version is: $bankVersion\n" if $VERBOSE > 1 ; | |
78 } | |
79 else { | |
80 croak "the given bank name ($bankName) doesn't match with any configuration parameters -- database downloading stopped" ; | |
81 } | |
82 | |
83 # manage if needed to download or not the bank (get or not the last version) | |
84 my ($bankFileExist, $bankFilePath, $bankFileDir) = (undef, undef, undef) ; | |
85 | |
86 if ($CONF->{$bankName.'_HTTP_FORMAT'}) { | |
87 $bankSuffixe = $CONF->{$bankName.'_HTTP_FORMAT'} ; | |
88 } | |
89 else { | |
90 croak "[ERROR] the given bank suffixe doesn't exist! please check your configuration parameters -- database download stopped" ; | |
91 } | |
92 | |
93 print "** Manage bank environment\n" if $VERBOSE > 1 ; | |
94 my $bankDir = $binPath.'/'.$CONF->{'LOCALBANK_PATH'} ; | |
95 if ( !-d $bankDir ) { | |
96 print "\tdir creation of $bankDir\n" if $VERBOSE > 1 ; | |
97 mkdir $bankDir ; | |
98 } | |
99 | |
100 | |
101 print "** Manage the download of the last version of the bank\n" if $VERBOSE > 1 ; | |
102 | |
103 if ( ( defined $bankUrl ) and ( defined $bankVersion ) ) { | |
104 if(-d $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion) { $bankFileExist = 'TRUE' ; } | |
105 else { $bankFileExist = 'FALSE' ; } | |
106 # Init the bank file name | |
107 $bankFileDir = $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion ; | |
108 $bankFilePath = $bankFileDir.'/'.$bankName.'_'.$bankVersion.'.'.lc($bankSuffixe) ; | |
109 # For final files (tabular/json) | |
110 $bankOutputTabularFile = $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion.'/'.$bankName.'_'.$bankVersion.'.tabular' ; | |
111 $bankOutputJsonFile = $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion.'/'.$bankName.'_'.$bankVersion.'.json' ; | |
112 | |
113 print "\tBuilding bank file dir: $bankFileDir\n" if $VERBOSE > 1 ; | |
114 print "\tBuilding bank file name: $bankFilePath\n" if $VERBOSE > 1 ; | |
115 | |
116 } | |
117 else { | |
118 croak "the given bank url ($bankUrl) and the detected version ar undef -- database download stopped" ; | |
119 } | |
120 | |
121 my $bankFile = $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'} ; | |
122 | |
123 # dowload the bank if does not exist ! | |
124 if ( $bankFileExist eq 'FALSE' ) { | |
125 | |
126 print "\t/!\\ The asked bank does not still exist /!\\\n" if $VERBOSE > 1 ; | |
127 if (!-d $bankFileDir) { | |
128 print "\tdir creation of $bankFileDir\n" if $VERBOSE > 1 ; | |
129 mkdir $bankFileDir ; | |
130 } | |
131 | |
132 if (-d $bankFileDir) { | |
133 print "\tDownload of the asked bank ($bankName)...\n" if $VERBOSE > 1 ; | |
134 print "\tFrom...$bankUrl\n" if $VERBOSE > 1 ; | |
135 my $oDownloader = utils->new() ; | |
2
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
136 my ($httpStatus) = $oDownloader->getHttpFile($bankUrl, $bankFilePath) ; |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
137 |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
138 if ($httpStatus ne '200 OK' ) { |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
139 unlink $bankFileDir ; |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
140 croak "\t/!\\ Download of $bankUrl failed with HTTP error: $httpStatus\n" ; |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
141 } |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
142 else { |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
143 print "\tDownloading process ended with HTTP: $httpStatus\n" ; |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
144 } |
0 | 145 |
146 print "\tUnzip the download archive ($bankFilePath) and clean env ...\n" if $VERBOSE > 1 ; | |
147 | |
148 my $oUnzip = utils->new() ; | |
149 # if archive is a zip | |
150 if ($bankSuffixe eq 'ZIP') { | |
151 $oUnzip->unzipFile($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'}, $CONF->{$bankName.'_FILE_NAME'}) ; | |
152 $oUnzip->cleanUnzip($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'} ) ; | |
153 } | |
154 # elsif archive is a gz | |
155 elsif ($bankSuffixe eq 'GZ') { | |
156 $oUnzip->gunzipFile($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'}, $CONF->{$bankName.'_FILE_NAME'}) ; | |
157 $oUnzip->cleanUnzip($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'} ) ; | |
158 } | |
159 } | |
160 else { | |
161 croak "the given bank dir doesn't ($bankFileDir) exist or cannot be created - No download started\n" ; | |
162 } | |
163 } | |
164 else { | |
165 print "\t/!\\ $bankName Bank is already present on the disk... /!\\\n" if $VERBOSE > 1 ; | |
166 print "\t/!\\ ...in $bankFile /!\\\n" if $VERBOSE > 1 ; | |
167 } | |
168 | |
169 ## | |
170 if (-e $bankOutputTabularFile) { | |
171 # copy the ref file into the user history/session | |
172 my $ocsv = csv->new( ) ; | |
173 my $csv = $ocsv->get_csv_object("\t") ; | |
174 my ($refEntries, $status) = $ocsv->parse_allcsv_object($csv, \$bankOutputTabularFile, 'y') ; | |
175 $ocsv->write_csv_from_arrays($csv, $outTab, $refEntries) ; | |
176 print "\t/!\\ copy the ref file $bankOutputTabularFile into your session ($outTab)... /!\\\n" if $VERBOSE > 1 ; | |
177 } | |
178 ## the final tabular file does not exists - need to be created from xml | |
179 else { | |
180 | |
181 ## Build a HASH with all metabolites from downloaded xml | |
182 my ($handler, $metabolites, $nbEntries) = (undef, undef) ; | |
183 | |
184 if (-e $bankFile) { | |
185 my $oHandler = hmdb_api->new ; | |
186 | |
187 # in case the download file is in XML or Metabocard or SDF... | |
188 if ($CONF->{$bankName.'_DB_FORMAT'} eq 'XML') { | |
189 ($metabolites, $nbEntries) = $oHandler->getMetaboliteFeatures($bankFile) ; | |
190 print "\tExtraction of $nbEntries metabolites from $bankName XML file\n" if $VERBOSE > 1 ; | |
191 } | |
192 elsif ( ($CONF->{$bankName.'_DB_FORMAT'} eq 'CARD') ) { | |
193 ($handler, $nbEntries) = $oHandler->cowmetdb_handle($bankFile) ; | |
194 ($metabolites) = $oHandler->cowmetdb_hash($handler) ; | |
195 print "\tExtraction of $$nbEntries metabolites from $bankName CARD file\n" if $VERBOSE > 1 ; | |
196 } | |
197 else { | |
198 #TODO... | |
199 } | |
200 } | |
201 else { | |
202 print "\t/!\\ The bank does not exist: $bankFile\n" if $VERBOSE > 1 ; | |
203 } | |
204 | |
205 ## Write outputs ! | |
206 print "** Write outputs from HMDB in BiH and Json formats\n" if $VERBOSE > 1 ; | |
207 | |
208 if ( (defined $metabolites) and (defined $format) ) { | |
209 | |
210 ## Generation of M+H and M-H masses | |
211 my $ometmz = hmdb_api->new ; | |
212 my $completedMetabolites = undef ; | |
213 $completedMetabolites = $ometmz->setMetaboliteAcurrateMzToModesMz($CONF->{$bankName.'_DB_FORMAT'}, $metabolites, $CONF->{'PROTON_MASS'}, $CONF->{'ELECTRON_MASS'}, 1) ; | |
214 | |
215 # print Dumper $completedMetabolites ; | |
216 | |
217 ## tabular output | |
218 if ( ( $format eq 'tabular') and (defined $outTab) ) { | |
219 print "\tThe tabular output ($outTab) is created...\n" if $VERBOSE > 1 ; | |
220 # print Dumper $metabolites ; | |
221 # sort metabolites | |
222 my $omet = hmdb_api->new ; | |
223 my $sortedMetabolites = undef ; | |
224 | |
225 if ($CONF->{$bankName.'_DB_FORMAT'} eq 'XML') { | |
226 $sortedMetabolites = $omet->buildMetabolitesArray($completedMetabolites) ; | |
227 } | |
228 elsif ( ($CONF->{$bankName.'_DB_FORMAT'} eq 'CARD') ) { | |
229 $sortedMetabolites= $omet->cowmetdb_hash_to_inhouse_format($completedMetabolites) ; | |
230 } | |
231 | |
232 # print Dumper $sortedMetabolites ; | |
233 my $ocsv = csv->new( ) ; | |
234 my $csv = $ocsv->get_csv_object("\t") ; | |
235 # create the ref file for ./bank repo | |
236 $ocsv->write_csv_from_arrays($csv, $bankOutputTabularFile, $sortedMetabolites) ; | |
237 # generate also a copy for user history | |
238 $ocsv->write_csv_from_arrays($csv, $outTab, $sortedMetabolites) ; | |
239 | |
240 } | |
241 elsif ( (defined $format) and (defined $outJson) ) { | |
242 #TODO... | |
243 } | |
244 } | |
245 else { | |
246 croak "No metabolites are extracted from the $bankName bank file\n" ; | |
247 } | |
248 ## Clean local envt | |
249 unlink $bankFile if (-e $bankFile) ; | |
250 } | |
251 } ## END IF defined $bankName | |
252 else { | |
253 &help ; | |
254 croak "No bank name and format are defined - Please set one" ; | |
255 } | |
256 | |
257 print "\n*************!!End of the job ;-). Thank you for using W4M!!****************\n" if $VERBOSE > 1 ; | |
258 ### END of main script | |
259 | |
260 | |
261 | |
262 | |
263 | |
264 #==================================================================================== | |
265 # Help subroutine called with -h option | |
266 # number of arguments : 0 | |
267 # Argument(s) : | |
268 # Return : 1 | |
269 #==================================================================================== | |
270 sub help { | |
271 print STDERR " | |
272 downloader_bank_hmdb.pl | |
273 | |
274 # downloader_bank_hmdb is a script to export specific tissue/matrix bank from HMDB source. | |
275 # Input : N/A | |
276 # Author : Franck Giacomoni | |
277 # Email : fgiacomoni\@inra.fr | |
278 # Version : 1.0 | |
279 # Created : 21/11/2018 | |
280 USAGE : | |
281 downloader_bank_hmdb.pl -bank [SERUM|URINE|CSF|...] -format [tabular|json] -outTab [tabular file name] | |
282 | |
283 "; | |
284 exit(1); | |
285 } | |
286 | |
287 ## END of script - F Giacomoni | |
288 | |
289 __END__ | |
290 | |
291 =head1 NAME | |
292 | |
293 downloader_bank_hmdb.pl is a script to export specific tissue/matrix bank from HMDB source. | |
294 | |
295 =head1 USAGE | |
296 | |
297 downloader_bank_hmdb.pl -bank [serum|urine|...] -format [tabular|json] | |
298 or downloader_bank_hmdb.pl -help | |
299 | |
300 =head1 SYNOPSIS | |
301 | |
302 This script export specific tissue/matrix bank from HMDB source. | |
303 | |
304 =head1 DESCRIPTION | |
305 | |
306 This main program is a galaxy tool (W4M) allowing the export specific tissue/matrix bank from HMDB source in a tabular format. | |
307 | |
308 =over 4 | |
309 | |
310 =item B<function01> | |
311 | |
312 =item B<function02> | |
313 | |
314 =back | |
315 | |
316 =head1 AUTHOR | |
317 | |
318 Franck Giacomoni E<lt>franck.giacomoni@inra.frE<gt> | |
319 | |
320 =head1 LICENSE | |
321 | |
322 This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. | |
323 | |
324 =head1 VERSION | |
325 | |
326 version 1 : 21 / 11 / 2018 | |
327 | |
328 version 2 : ?? | |
329 | |
330 =cut |