Mercurial > repos > fgiacomoni > downloader_bank_hmdb
annotate lib/utils.pm @ 2:be504ccbc41c draft default tip
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
author | fgiacomoni |
---|---|
date | Wed, 30 Nov 2022 16:14:27 +0000 |
parents | 7c9269bded0e |
children |
rev | line source |
---|---|
0 | 1 package utils ; |
2 | |
3 use strict; | |
4 use warnings ; | |
5 use Exporter ; | |
6 use Carp ; | |
7 | |
8 use Data::Dumper ; | |
9 use LWP::UserAgent ; | |
10 use LWP::Simple ; | |
11 use HTTP::Status qw(:constants :is status_message); | |
12 use Archive::Zip ; | |
13 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ; | |
14 | |
15 use vars qw($VERSION @ISA @EXPORT %EXPORT_TAGS); | |
16 | |
17 our $VERSION = "1.0"; | |
18 our @ISA = qw(Exporter); | |
19 our @EXPORT = qw( getHttpFileVersion getHttpFile unzipFile ); | |
20 our %EXPORT_TAGS = ( ALL => [qw( getHttpFileVersion getHttpFile unzipFile )] ); | |
21 | |
22 =head1 NAME | |
23 | |
24 My::Module - An example module | |
25 | |
26 =head1 SYNOPSIS | |
27 | |
28 use My::Module; | |
29 my $object = My::Module->new(); | |
30 print $object->as_string; | |
31 | |
32 =head1 DESCRIPTION | |
33 | |
34 This module does not really exist, it | |
35 was made for the sole purpose of | |
36 demonstrating how POD works. | |
37 | |
38 =head1 METHODS | |
39 | |
40 Methods are : | |
41 | |
42 =head2 METHOD new | |
43 | |
44 ## Description : new | |
45 ## Input : $self | |
46 ## Ouput : bless $self ; | |
47 ## Usage : new() ; | |
48 | |
49 =cut | |
50 | |
51 sub new { | |
52 ## Variables | |
53 my $self={}; | |
54 bless($self) ; | |
55 return $self ; | |
56 } | |
57 ### END of SUB | |
58 | |
2
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
59 =head2 METHOD checkHttpUrl |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
60 |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
61 ## Description : check if a http url exists or not and warn/die |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
62 ## Input : $url |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
63 ## Output : $warn |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
64 ## Usage : my ( $warn ) = checkHttpUrl ( $url ) ; |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
65 |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
66 =cut |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
67 ## START of SUB |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
68 sub checkHttpUrl { |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
69 ## Retrieve Values |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
70 my $self = shift ; |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
71 my ( $url ) = @_; |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
72 my ( $warn ) = ( undef ) ; |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
73 |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
74 my $ua = new LWP::UserAgent; |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
75 |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
76 ## just make different not existing/time out |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
77 $ua->timeout(10); |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
78 |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
79 if ($ua->head($url)) { |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
80 print "\t$url DOES EXIST\n"; |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
81 } else { |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
82 croak "\t$url DOES not exist or timeout\n";; |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
83 } |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
84 |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
85 |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
86 return ($warn) ; |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
87 } |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
88 ### END of SUB |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
89 |
0 | 90 =head2 METHOD getHttpFileVersion |
91 | |
92 ## Description : fetch the version of a file from its http header | |
93 ## Input : $url | |
94 ## Output : $version | |
95 ## Usage : $version= getHttpFileVersion($url) ; | |
96 | |
97 =cut | |
98 ## START of SUB | |
99 sub getHttpFileVersion { | |
100 ## Retrieve Values | |
101 my $self = shift ; | |
102 my ( $url ) = @_ ; | |
103 | |
104 my ( $version ) = undef ; | |
105 | |
106 # based on https://stackoverflow.com/questions/36903905/extract-headers-from-http-request-in-perl | |
107 my $ua = new LWP::UserAgent; | |
2
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
108 $ua->agent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:64.0) Gecko/20100101 Firefox/64.0"); |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
109 $ua->from('franck.giacomoni@inrae.fr'); |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
110 $ua->ssl_opts(timeout => 100, verify_hostname => 0); |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
111 |
0 | 112 my $result = $ua->head($url); |
113 | |
114 for my $header_name ($result->header_field_names) { | |
115 | |
2
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
116 if ( ($header_name eq 'Last-Modified') or ($header_name eq 'last-Modified') or ($header_name eq 'last-Modified') ) { |
0 | 117 # print $result->header($header_name)."\n"; |
118 if ($result->header($header_name) =~/[a-z|A-Z]+,\s(.*)\s[0-9]+:[0-9]+:[0-9]+\s[a-z|A-Z]+$/) { | |
119 $version = $1 ; | |
120 | |
121 #version format is DD Month(3letters) YYYY (as "09 Jul 2018") | |
122 if ($version =~/([0-9]+)\s([a-z|A-Z]+)\s([0-9]+)/) { | |
123 $version = 'v'.$3.$2.$1 ; | |
124 } | |
125 else{ | |
126 warn "the current version format - DD Month(3letters) YYYY - doesn't match with template" ; | |
127 } | |
128 } | |
129 } | |
130 else { | |
131 next ; | |
132 } | |
2
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
133 # print "\tVERSION IS: $version\n" ; |
0 | 134 } |
2
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
135 |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
136 if (!$version) { |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
137 croak "\t /!\\ the current version of the db is not findable !!" |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
138 } |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
139 |
0 | 140 return($version) ; |
141 } | |
142 ## END of SUB | |
143 | |
144 =head2 METHOD getHttpFile | |
145 | |
146 ## Description : fetch a file from http | |
147 ## Input : $url, $filename | |
148 ## Output : $file | |
149 ## Usage : $file= getHttpFile($url, $filename) ; | |
150 | |
151 =cut | |
152 ## START of SUB | |
153 sub getHttpFile { | |
154 ## Retrieve Values | |
155 my $self = shift ; | |
156 my ( $url, $fileNameToGet ) = @_ ; | |
157 | |
2
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
158 my $hstatus = 404 ; # default |
0 | 159 |
160 if ( (defined $url ) and (defined $fileNameToGet ) ) { | |
161 my $ua = LWP::UserAgent->new(); | |
2
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
162 $ua->agent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:64.0) Gecko/20100101 Firefox/64.0"); |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
163 $ua->from('franck.giacomoni@inrae.fr'); |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
164 $ua->ssl_opts(timeout => 100, verify_hostname => 0); |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
165 |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
166 # my $hstatus = getstore ($url, $fileNameToGet); |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
167 |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
168 my $response = $ua->get($url); |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
169 |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
170 if ($response->is_error) { |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
171 $hstatus = $response->status_line ; |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
172 print "\t\t$hstatus: ", status_message($hstatus), "\n"; |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
173 carp $response->status_line, "\n"; |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
174 } |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
175 else { |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
176 $hstatus = $response->status_line ; |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
177 } |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
178 |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
179 open my $fh, '>', $fileNameToGet or die $!; |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
180 binmode $fh; |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
181 print $fh $response->decoded_content; |
0 | 182 |
2
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
183 # if($hstatus != HTTP_OK) { |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
184 # print "\t\t$hstatus: ", status_message($hstatus), "\n"; |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
185 # } |
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
186 # |
0 | 187 if (!-e $fileNameToGet) { |
2
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
188 carp "None file (should be $fileNameToGet) was download from the given url($url)\n" ; |
0 | 189 } |
190 } | |
191 | |
2
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
192 return($hstatus) ; |
0 | 193 } |
194 ## END of SUB | |
195 | |
196 =head2 METHOD unzipFile | |
197 | |
198 ## Description : unzip a wanted file from a zip archive | |
199 ## Input : $archive, $filePath, $fileName | |
200 ## Output : 1 | |
201 ## Usage : unzipFile($archive, $filePath, $fileName) ; | |
202 | |
203 =cut | |
204 sub unzipFile { | |
205 ## Retrieve Values | |
206 my $self = shift ; | |
207 my ($archive, $filePath, $fileName) = @_ ; | |
208 | |
209 my $zip = Archive::Zip->new($archive); | |
210 | |
211 if ( (defined $fileName) and (defined $filePath) ) { | |
212 | |
213 foreach my $file ($zip->members) { | |
214 next unless ($file->fileName eq $fileName); | |
215 $file->extractToFileNamed($filePath); | |
216 } | |
217 | |
218 croak "There was a problem extracting $fileName from $archive" unless (-e $filePath); | |
219 } | |
220 else { | |
221 croak "the given filePath or the filename are undef\n" ; | |
222 } | |
223 | |
224 return 1; | |
225 } | |
226 ## END of SUB | |
227 | |
228 =head2 METHOD gunzipFile | |
229 | |
230 ## Description : unzip a wanted file from a zip archive | |
231 ## Input : $archive, $filePath, $fileName | |
232 ## Output : 1 | |
233 ## Usage : gunzipFile($archive, $filePath, $fileName) ; | |
234 | |
235 =cut | |
236 sub gunzipFile { | |
237 ## Retrieve Values | |
238 my $self = shift ; | |
239 my ($archive, $filePath, $fileName) = @_ ; | |
240 | |
241 if ( (defined $fileName) and (defined $filePath) ) { | |
242 | |
243 gunzip $archive => $filePath | |
244 or die "gunzip failed: $GunzipError\n"; | |
245 } | |
246 else { | |
247 croak "the given filePath or the filename are undef\n" ; | |
248 } | |
249 | |
250 return 1; | |
251 } | |
252 ## END of SUB | |
253 | |
254 | |
255 =head2 METHOD cleanUnzip | |
256 | |
257 ## Description : clean zip file if the unzip is successfully run | |
258 ## Input : $archive, $filePath | |
259 ## Output : 1 | |
260 ## Usage : unzipFile($archive, $want, $dir) ; | |
261 | |
262 =cut | |
263 sub cleanUnzip { | |
264 ## Retrieve Values | |
265 my $self = shift ; | |
266 my ($archive, $filePath ) = @_ ; | |
267 | |
268 if ( (defined $archive) and (defined $filePath) ) { | |
269 | |
270 croak "There was a problem extracting $filePath from $archive" unless (-e $filePath); | |
271 unlink $archive ; | |
272 } | |
273 else { | |
274 croak "Given filePath or the archive are undef\n" ; | |
275 } | |
276 | |
277 return 1; | |
278 } | |
279 ## END of SUB | |
280 | |
281 1 ; | |
282 | |
283 | |
284 __END__ | |
285 | |
286 =head1 SUPPORT | |
287 | |
288 You can find documentation for this module with the perldoc command. | |
289 | |
290 perldoc XXX.pm | |
291 | |
292 =head1 Exports | |
293 | |
294 =over 4 | |
295 | |
296 =item :ALL is ... | |
297 | |
298 =back | |
299 | |
300 =head1 AUTHOR | |
301 | |
302 Franck Giacomoni E<lt>franck.giacomoni@clermont.inra.frE<gt> | |
303 | |
304 =head1 LICENSE | |
305 | |
306 This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. | |
307 | |
308 =head1 VERSION | |
309 | |
310 version 1 : xx / xx / 201x | |
311 | |
312 version 2 : ?? | |
313 | |
314 =cut |