Mercurial > repos > fgiacomoni > downloader_bank_hmdb
comparison downloader_bank_hmdb.pl @ 0:7c9269bded0e draft
Init repository for [downloader_bank_hmdb]
author | fgiacomoni |
---|---|
date | Tue, 14 Jan 2020 05:21:23 -0500 |
parents | |
children | be504ccbc41c |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:7c9269bded0e |
---|---|
1 #!perl | |
2 | |
3 ## script : downloader_bank_hmdb.pl | |
4 #============================================================================= | |
5 # Included modules and versions | |
6 #============================================================================= | |
7 ## Perl modules | |
8 use strict ; | |
9 use warnings ; | |
10 use Carp qw (cluck croak carp) ; | |
11 | |
12 use Data::Dumper ; | |
13 use Getopt::Long ; | |
14 use File::Basename ; | |
15 use FindBin ; ## Allows you to locate the directory of original perl script | |
16 | |
17 ## Specific Perl Modules (PFEM) | |
18 use lib $FindBin::Bin.'/lib' ; | |
19 my $binPath = $FindBin::Bin ; | |
20 | |
21 ## Dedicate Perl Modules (Home made...) | |
22 use hmdb_api qw( :ALL ) ; | |
23 use utils qw( :ALL ) ; | |
24 use conf qw( :ALL ) ; | |
25 use csv qw( :ALL ) ; | |
26 | |
27 ## Initialized values | |
28 my $OptHelp ; | |
29 my $VERBOSE = 3 ; | |
30 my ($bankName, $format, $outTab, $outJson) = (undef, undef, undef, undef) ; | |
31 | |
32 #============================================================================= | |
33 # Manage EXCEPTIONS | |
34 #============================================================================= | |
35 &GetOptions ( "h" => \$OptHelp, # HELP | |
36 "bank|b:s" => \$bankName, # bank name to get the rigth url | |
37 "format|f:s"=> \$format, # output format | |
38 "outTab:s" => \$outTab, # tabular output | |
39 "outJson:s" => \$outJson, # Json output | |
40 "verbose:i" => \$VERBOSE, # level of scriptt verbose [should be 1 or 3] | |
41 | |
42 | |
43 ) ; | |
44 | |
45 ## if you put the option -help or -h function help is started | |
46 if ( defined($OptHelp) ){ &help ; } | |
47 | |
48 #============================================================================= | |
49 # MAIN SCRIPT | |
50 #============================================================================= | |
51 | |
52 | |
53 # get conf informations | |
54 my ( $CONF ) = ( undef ) ; | |
55 foreach my $conf ( <$binPath/*.cfg> ) { | |
56 my $oConf = conf::new() ; | |
57 $CONF = $oConf->as_conf($conf) ; | |
58 } | |
59 | |
60 # MAIN SCRIPT : | |
61 | |
62 if ( (defined $bankName) ) { | |
63 | |
64 my ( $bankUrl, $bankVersion, $bankSuffixe ) = (undef, undef, undef) ; | |
65 my ( $bankOutputTabularFile, $bankOutputJsonFile ) = (undef, undef) ; | |
66 | |
67 print "** Get version information from Wishart server databases (Canada)\n" if $VERBOSE > 1 ; | |
68 | |
69 # get information from conf | |
70 if ($CONF->{$bankName.'_URL'}) { | |
71 $bankUrl = $CONF->{$bankName.'_URL'} ; | |
72 # get version of the http resource | |
73 my $oUtils = utils->new() ; | |
74 ($bankVersion) = $oUtils->getHttpFileVersion($bankUrl) ; | |
75 | |
76 print "\tThe current version is: $bankVersion\n" if $VERBOSE > 1 ; | |
77 } | |
78 else { | |
79 croak "the given bank name ($bankName) doesn't match with any configuration parameters -- database downloading stopped" ; | |
80 } | |
81 | |
82 # manage if needed to download or not the bank (get or not the last version) | |
83 my ($bankFileExist, $bankFilePath, $bankFileDir) = (undef, undef, undef) ; | |
84 | |
85 if ($CONF->{$bankName.'_HTTP_FORMAT'}) { | |
86 $bankSuffixe = $CONF->{$bankName.'_HTTP_FORMAT'} ; | |
87 } | |
88 else { | |
89 croak "[ERROR] the given bank suffixe doesn't exist! please check your configuration parameters -- database download stopped" ; | |
90 } | |
91 | |
92 print "** Manage bank environment\n" if $VERBOSE > 1 ; | |
93 my $bankDir = $binPath.'/'.$CONF->{'LOCALBANK_PATH'} ; | |
94 if ( !-d $bankDir ) { | |
95 print "\tdir creation of $bankDir\n" if $VERBOSE > 1 ; | |
96 mkdir $bankDir ; | |
97 } | |
98 | |
99 | |
100 print "** Manage the download of the last version of the bank\n" if $VERBOSE > 1 ; | |
101 | |
102 if ( ( defined $bankUrl ) and ( defined $bankVersion ) ) { | |
103 if(-d $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion) { $bankFileExist = 'TRUE' ; } | |
104 else { $bankFileExist = 'FALSE' ; } | |
105 # Init the bank file name | |
106 $bankFileDir = $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion ; | |
107 $bankFilePath = $bankFileDir.'/'.$bankName.'_'.$bankVersion.'.'.lc($bankSuffixe) ; | |
108 # For final files (tabular/json) | |
109 $bankOutputTabularFile = $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion.'/'.$bankName.'_'.$bankVersion.'.tabular' ; | |
110 $bankOutputJsonFile = $binPath.'/'.$CONF->{'LOCALBANK_PATH'}.'/'.$bankName.'_'.$bankVersion.'/'.$bankName.'_'.$bankVersion.'.json' ; | |
111 | |
112 print "\tBuilding bank file dir: $bankFileDir\n" if $VERBOSE > 1 ; | |
113 print "\tBuilding bank file name: $bankFilePath\n" if $VERBOSE > 1 ; | |
114 | |
115 } | |
116 else { | |
117 croak "the given bank url ($bankUrl) and the detected version ar undef -- database download stopped" ; | |
118 } | |
119 | |
120 my $bankFile = $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'} ; | |
121 | |
122 # dowload the bank if does not exist ! | |
123 if ( $bankFileExist eq 'FALSE' ) { | |
124 | |
125 print "\t/!\\ The asked bank does not still exist /!\\\n" if $VERBOSE > 1 ; | |
126 if (!-d $bankFileDir) { | |
127 print "\tdir creation of $bankFileDir\n" if $VERBOSE > 1 ; | |
128 mkdir $bankFileDir ; | |
129 } | |
130 | |
131 if (-d $bankFileDir) { | |
132 print "\tDownload of the asked bank ($bankName)...\n" if $VERBOSE > 1 ; | |
133 print "\tFrom...$bankUrl\n" if $VERBOSE > 1 ; | |
134 my $oDownloader = utils->new() ; | |
135 my ($fileZip) = $oDownloader->getHttpFile($bankUrl, $bankFilePath) ; | |
136 | |
137 print "\tUnzip the download archive ($bankFilePath) and clean env ...\n" if $VERBOSE > 1 ; | |
138 | |
139 my $oUnzip = utils->new() ; | |
140 # if archive is a zip | |
141 if ($bankSuffixe eq 'ZIP') { | |
142 $oUnzip->unzipFile($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'}, $CONF->{$bankName.'_FILE_NAME'}) ; | |
143 $oUnzip->cleanUnzip($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'} ) ; | |
144 } | |
145 # elsif archive is a gz | |
146 elsif ($bankSuffixe eq 'GZ') { | |
147 $oUnzip->gunzipFile($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'}, $CONF->{$bankName.'_FILE_NAME'}) ; | |
148 $oUnzip->cleanUnzip($bankFilePath, $bankFileDir.'/'.$CONF->{$bankName.'_FILE_NAME'} ) ; | |
149 } | |
150 } | |
151 else { | |
152 croak "the given bank dir doesn't ($bankFileDir) exist or cannot be created - No download started\n" ; | |
153 } | |
154 } | |
155 else { | |
156 print "\t/!\\ $bankName Bank is already present on the disk... /!\\\n" if $VERBOSE > 1 ; | |
157 print "\t/!\\ ...in $bankFile /!\\\n" if $VERBOSE > 1 ; | |
158 } | |
159 | |
160 ## | |
161 if (-e $bankOutputTabularFile) { | |
162 # copy the ref file into the user history/session | |
163 my $ocsv = csv->new( ) ; | |
164 my $csv = $ocsv->get_csv_object("\t") ; | |
165 my ($refEntries, $status) = $ocsv->parse_allcsv_object($csv, \$bankOutputTabularFile, 'y') ; | |
166 $ocsv->write_csv_from_arrays($csv, $outTab, $refEntries) ; | |
167 print "\t/!\\ copy the ref file $bankOutputTabularFile into your session ($outTab)... /!\\\n" if $VERBOSE > 1 ; | |
168 } | |
169 ## the final tabular file does not exists - need to be created from xml | |
170 else { | |
171 | |
172 ## Build a HASH with all metabolites from downloaded xml | |
173 my ($handler, $metabolites, $nbEntries) = (undef, undef) ; | |
174 | |
175 if (-e $bankFile) { | |
176 my $oHandler = hmdb_api->new ; | |
177 | |
178 # in case the download file is in XML or Metabocard or SDF... | |
179 if ($CONF->{$bankName.'_DB_FORMAT'} eq 'XML') { | |
180 ($metabolites, $nbEntries) = $oHandler->getMetaboliteFeatures($bankFile) ; | |
181 print "\tExtraction of $nbEntries metabolites from $bankName XML file\n" if $VERBOSE > 1 ; | |
182 } | |
183 elsif ( ($CONF->{$bankName.'_DB_FORMAT'} eq 'CARD') ) { | |
184 ($handler, $nbEntries) = $oHandler->cowmetdb_handle($bankFile) ; | |
185 ($metabolites) = $oHandler->cowmetdb_hash($handler) ; | |
186 print "\tExtraction of $$nbEntries metabolites from $bankName CARD file\n" if $VERBOSE > 1 ; | |
187 } | |
188 else { | |
189 #TODO... | |
190 } | |
191 } | |
192 else { | |
193 print "\t/!\\ The bank does not exist: $bankFile\n" if $VERBOSE > 1 ; | |
194 } | |
195 | |
196 ## Write outputs ! | |
197 print "** Write outputs from HMDB in BiH and Json formats\n" if $VERBOSE > 1 ; | |
198 | |
199 if ( (defined $metabolites) and (defined $format) ) { | |
200 | |
201 ## Generation of M+H and M-H masses | |
202 my $ometmz = hmdb_api->new ; | |
203 my $completedMetabolites = undef ; | |
204 $completedMetabolites = $ometmz->setMetaboliteAcurrateMzToModesMz($CONF->{$bankName.'_DB_FORMAT'}, $metabolites, $CONF->{'PROTON_MASS'}, $CONF->{'ELECTRON_MASS'}, 1) ; | |
205 | |
206 # print Dumper $completedMetabolites ; | |
207 | |
208 ## tabular output | |
209 if ( ( $format eq 'tabular') and (defined $outTab) ) { | |
210 print "\tThe tabular output ($outTab) is created...\n" if $VERBOSE > 1 ; | |
211 # print Dumper $metabolites ; | |
212 # sort metabolites | |
213 my $omet = hmdb_api->new ; | |
214 my $sortedMetabolites = undef ; | |
215 | |
216 if ($CONF->{$bankName.'_DB_FORMAT'} eq 'XML') { | |
217 $sortedMetabolites = $omet->buildMetabolitesArray($completedMetabolites) ; | |
218 } | |
219 elsif ( ($CONF->{$bankName.'_DB_FORMAT'} eq 'CARD') ) { | |
220 $sortedMetabolites= $omet->cowmetdb_hash_to_inhouse_format($completedMetabolites) ; | |
221 } | |
222 | |
223 # print Dumper $sortedMetabolites ; | |
224 my $ocsv = csv->new( ) ; | |
225 my $csv = $ocsv->get_csv_object("\t") ; | |
226 # create the ref file for ./bank repo | |
227 $ocsv->write_csv_from_arrays($csv, $bankOutputTabularFile, $sortedMetabolites) ; | |
228 # generate also a copy for user history | |
229 $ocsv->write_csv_from_arrays($csv, $outTab, $sortedMetabolites) ; | |
230 | |
231 } | |
232 elsif ( (defined $format) and (defined $outJson) ) { | |
233 #TODO... | |
234 } | |
235 } | |
236 else { | |
237 croak "No metabolites are extracted from the $bankName bank file\n" ; | |
238 } | |
239 ## Clean local envt | |
240 unlink $bankFile if (-e $bankFile) ; | |
241 } | |
242 } ## END IF defined $bankName | |
243 else { | |
244 &help ; | |
245 croak "No bank name and format are defined - Please set one" ; | |
246 } | |
247 | |
248 print "\n*************!!End of the job ;-). Thank you for using W4M!!****************\n" if $VERBOSE > 1 ; | |
249 ### END of main script | |
250 | |
251 | |
252 | |
253 | |
254 | |
255 #==================================================================================== | |
256 # Help subroutine called with -h option | |
257 # number of arguments : 0 | |
258 # Argument(s) : | |
259 # Return : 1 | |
260 #==================================================================================== | |
261 sub help { | |
262 print STDERR " | |
263 downloader_bank_hmdb.pl | |
264 | |
265 # downloader_bank_hmdb is a script to export specific tissue/matrix bank from HMDB source. | |
266 # Input : N/A | |
267 # Author : Franck Giacomoni | |
268 # Email : fgiacomoni\@inra.fr | |
269 # Version : 1.0 | |
270 # Created : 21/11/2018 | |
271 USAGE : | |
272 downloader_bank_hmdb.pl -bank [SERUM|URINE|CSF|...] -format [tabular|json] -outTab [tabular file name] | |
273 | |
274 "; | |
275 exit(1); | |
276 } | |
277 | |
278 ## END of script - F Giacomoni | |
279 | |
280 __END__ | |
281 | |
282 =head1 NAME | |
283 | |
284 downloader_bank_hmdb.pl is a script to export specific tissue/matrix bank from HMDB source. | |
285 | |
286 =head1 USAGE | |
287 | |
288 downloader_bank_hmdb.pl -bank [serum|urine|...] -format [tabular|json] | |
289 or downloader_bank_hmdb.pl -help | |
290 | |
291 =head1 SYNOPSIS | |
292 | |
293 This script export specific tissue/matrix bank from HMDB source. | |
294 | |
295 =head1 DESCRIPTION | |
296 | |
297 This main program is a galaxy tool (W4M) allowing the export specific tissue/matrix bank from HMDB source in a tabular format. | |
298 | |
299 =over 4 | |
300 | |
301 =item B<function01> | |
302 | |
303 =item B<function02> | |
304 | |
305 =back | |
306 | |
307 =head1 AUTHOR | |
308 | |
309 Franck Giacomoni E<lt>franck.giacomoni@inra.frE<gt> | |
310 | |
311 =head1 LICENSE | |
312 | |
313 This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. | |
314 | |
315 =head1 VERSION | |
316 | |
317 version 1 : 21 / 11 / 2018 | |
318 | |
319 version 2 : ?? | |
320 | |
321 =cut |