Mercurial > repos > fgiacomoni > downloader_bank_hmdb
annotate lib/utils.pm @ 2:be504ccbc41c draft default tip
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
| author | fgiacomoni |
|---|---|
| date | Wed, 30 Nov 2022 16:14:27 +0000 |
| parents | 7c9269bded0e |
| children |
| rev | line source |
|---|---|
| 0 | 1 package utils ; |
| 2 | |
| 3 use strict; | |
| 4 use warnings ; | |
| 5 use Exporter ; | |
| 6 use Carp ; | |
| 7 | |
| 8 use Data::Dumper ; | |
| 9 use LWP::UserAgent ; | |
| 10 use LWP::Simple ; | |
| 11 use HTTP::Status qw(:constants :is status_message); | |
| 12 use Archive::Zip ; | |
| 13 use IO::Uncompress::Gunzip qw(gunzip $GunzipError) ; | |
| 14 | |
| 15 use vars qw($VERSION @ISA @EXPORT %EXPORT_TAGS); | |
| 16 | |
| 17 our $VERSION = "1.0"; | |
| 18 our @ISA = qw(Exporter); | |
| 19 our @EXPORT = qw( getHttpFileVersion getHttpFile unzipFile ); | |
| 20 our %EXPORT_TAGS = ( ALL => [qw( getHttpFileVersion getHttpFile unzipFile )] ); | |
| 21 | |
| 22 =head1 NAME | |
| 23 | |
| 24 My::Module - An example module | |
| 25 | |
| 26 =head1 SYNOPSIS | |
| 27 | |
| 28 use My::Module; | |
| 29 my $object = My::Module->new(); | |
| 30 print $object->as_string; | |
| 31 | |
| 32 =head1 DESCRIPTION | |
| 33 | |
| 34 This module does not really exist, it | |
| 35 was made for the sole purpose of | |
| 36 demonstrating how POD works. | |
| 37 | |
| 38 =head1 METHODS | |
| 39 | |
| 40 Methods are : | |
| 41 | |
| 42 =head2 METHOD new | |
| 43 | |
| 44 ## Description : new | |
| 45 ## Input : $self | |
| 46 ## Ouput : bless $self ; | |
| 47 ## Usage : new() ; | |
| 48 | |
| 49 =cut | |
| 50 | |
| 51 sub new { | |
| 52 ## Variables | |
| 53 my $self={}; | |
| 54 bless($self) ; | |
| 55 return $self ; | |
| 56 } | |
| 57 ### END of SUB | |
| 58 | |
|
2
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
59 =head2 METHOD checkHttpUrl |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
60 |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
61 ## Description : check if a http url exists or not and warn/die |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
62 ## Input : $url |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
63 ## Output : $warn |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
64 ## Usage : my ( $warn ) = checkHttpUrl ( $url ) ; |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
65 |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
66 =cut |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
67 ## START of SUB |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
68 sub checkHttpUrl { |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
69 ## Retrieve Values |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
70 my $self = shift ; |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
71 my ( $url ) = @_; |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
72 my ( $warn ) = ( undef ) ; |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
73 |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
74 my $ua = new LWP::UserAgent; |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
75 |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
76 ## just make different not existing/time out |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
77 $ua->timeout(10); |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
78 |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
79 if ($ua->head($url)) { |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
80 print "\t$url DOES EXIST\n"; |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
81 } else { |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
82 croak "\t$url DOES not exist or timeout\n";; |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
83 } |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
84 |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
85 |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
86 return ($warn) ; |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
87 } |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
88 ### END of SUB |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
89 |
| 0 | 90 =head2 METHOD getHttpFileVersion |
| 91 | |
| 92 ## Description : fetch the version of a file from its http header | |
| 93 ## Input : $url | |
| 94 ## Output : $version | |
| 95 ## Usage : $version= getHttpFileVersion($url) ; | |
| 96 | |
| 97 =cut | |
| 98 ## START of SUB | |
| 99 sub getHttpFileVersion { | |
| 100 ## Retrieve Values | |
| 101 my $self = shift ; | |
| 102 my ( $url ) = @_ ; | |
| 103 | |
| 104 my ( $version ) = undef ; | |
| 105 | |
| 106 # based on https://stackoverflow.com/questions/36903905/extract-headers-from-http-request-in-perl | |
| 107 my $ua = new LWP::UserAgent; | |
|
2
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
108 $ua->agent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:64.0) Gecko/20100101 Firefox/64.0"); |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
109 $ua->from('franck.giacomoni@inrae.fr'); |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
110 $ua->ssl_opts(timeout => 100, verify_hostname => 0); |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
111 |
| 0 | 112 my $result = $ua->head($url); |
| 113 | |
| 114 for my $header_name ($result->header_field_names) { | |
| 115 | |
|
2
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
116 if ( ($header_name eq 'Last-Modified') or ($header_name eq 'last-Modified') or ($header_name eq 'last-Modified') ) { |
| 0 | 117 # print $result->header($header_name)."\n"; |
| 118 if ($result->header($header_name) =~/[a-z|A-Z]+,\s(.*)\s[0-9]+:[0-9]+:[0-9]+\s[a-z|A-Z]+$/) { | |
| 119 $version = $1 ; | |
| 120 | |
| 121 #version format is DD Month(3letters) YYYY (as "09 Jul 2018") | |
| 122 if ($version =~/([0-9]+)\s([a-z|A-Z]+)\s([0-9]+)/) { | |
| 123 $version = 'v'.$3.$2.$1 ; | |
| 124 } | |
| 125 else{ | |
| 126 warn "the current version format - DD Month(3letters) YYYY - doesn't match with template" ; | |
| 127 } | |
| 128 } | |
| 129 } | |
| 130 else { | |
| 131 next ; | |
| 132 } | |
|
2
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
133 # print "\tVERSION IS: $version\n" ; |
| 0 | 134 } |
|
2
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
135 |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
136 if (!$version) { |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
137 croak "\t /!\\ the current version of the db is not findable !!" |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
138 } |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
139 |
| 0 | 140 return($version) ; |
| 141 } | |
| 142 ## END of SUB | |
| 143 | |
| 144 =head2 METHOD getHttpFile | |
| 145 | |
| 146 ## Description : fetch a file from http | |
| 147 ## Input : $url, $filename | |
| 148 ## Output : $file | |
| 149 ## Usage : $file= getHttpFile($url, $filename) ; | |
| 150 | |
| 151 =cut | |
| 152 ## START of SUB | |
| 153 sub getHttpFile { | |
| 154 ## Retrieve Values | |
| 155 my $self = shift ; | |
| 156 my ( $url, $fileNameToGet ) = @_ ; | |
| 157 | |
|
2
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
158 my $hstatus = 404 ; # default |
| 0 | 159 |
| 160 if ( (defined $url ) and (defined $fileNameToGet ) ) { | |
| 161 my $ua = LWP::UserAgent->new(); | |
|
2
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
162 $ua->agent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:64.0) Gecko/20100101 Firefox/64.0"); |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
163 $ua->from('franck.giacomoni@inrae.fr'); |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
164 $ua->ssl_opts(timeout => 100, verify_hostname => 0); |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
165 |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
166 # my $hstatus = getstore ($url, $fileNameToGet); |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
167 |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
168 my $response = $ua->get($url); |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
169 |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
170 if ($response->is_error) { |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
171 $hstatus = $response->status_line ; |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
172 print "\t\t$hstatus: ", status_message($hstatus), "\n"; |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
173 carp $response->status_line, "\n"; |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
174 } |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
175 else { |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
176 $hstatus = $response->status_line ; |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
177 } |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
178 |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
179 open my $fh, '>', $fileNameToGet or die $!; |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
180 binmode $fh; |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
181 print $fh $response->decoded_content; |
| 0 | 182 |
|
2
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
183 # if($hstatus != HTTP_OK) { |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
184 # print "\t\t$hstatus: ", status_message($hstatus), "\n"; |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
185 # } |
|
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
186 # |
| 0 | 187 if (!-e $fileNameToGet) { |
|
2
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
188 carp "None file (should be $fileNameToGet) was download from the given url($url)\n" ; |
| 0 | 189 } |
| 190 } | |
| 191 | |
|
2
be504ccbc41c
master branch Updating with tag :CI_COMMIT_TAG - - Fxx
fgiacomoni
parents:
0
diff
changeset
|
192 return($hstatus) ; |
| 0 | 193 } |
| 194 ## END of SUB | |
| 195 | |
| 196 =head2 METHOD unzipFile | |
| 197 | |
| 198 ## Description : unzip a wanted file from a zip archive | |
| 199 ## Input : $archive, $filePath, $fileName | |
| 200 ## Output : 1 | |
| 201 ## Usage : unzipFile($archive, $filePath, $fileName) ; | |
| 202 | |
| 203 =cut | |
| 204 sub unzipFile { | |
| 205 ## Retrieve Values | |
| 206 my $self = shift ; | |
| 207 my ($archive, $filePath, $fileName) = @_ ; | |
| 208 | |
| 209 my $zip = Archive::Zip->new($archive); | |
| 210 | |
| 211 if ( (defined $fileName) and (defined $filePath) ) { | |
| 212 | |
| 213 foreach my $file ($zip->members) { | |
| 214 next unless ($file->fileName eq $fileName); | |
| 215 $file->extractToFileNamed($filePath); | |
| 216 } | |
| 217 | |
| 218 croak "There was a problem extracting $fileName from $archive" unless (-e $filePath); | |
| 219 } | |
| 220 else { | |
| 221 croak "the given filePath or the filename are undef\n" ; | |
| 222 } | |
| 223 | |
| 224 return 1; | |
| 225 } | |
| 226 ## END of SUB | |
| 227 | |
| 228 =head2 METHOD gunzipFile | |
| 229 | |
| 230 ## Description : unzip a wanted file from a zip archive | |
| 231 ## Input : $archive, $filePath, $fileName | |
| 232 ## Output : 1 | |
| 233 ## Usage : gunzipFile($archive, $filePath, $fileName) ; | |
| 234 | |
| 235 =cut | |
| 236 sub gunzipFile { | |
| 237 ## Retrieve Values | |
| 238 my $self = shift ; | |
| 239 my ($archive, $filePath, $fileName) = @_ ; | |
| 240 | |
| 241 if ( (defined $fileName) and (defined $filePath) ) { | |
| 242 | |
| 243 gunzip $archive => $filePath | |
| 244 or die "gunzip failed: $GunzipError\n"; | |
| 245 } | |
| 246 else { | |
| 247 croak "the given filePath or the filename are undef\n" ; | |
| 248 } | |
| 249 | |
| 250 return 1; | |
| 251 } | |
| 252 ## END of SUB | |
| 253 | |
| 254 | |
| 255 =head2 METHOD cleanUnzip | |
| 256 | |
| 257 ## Description : clean zip file if the unzip is successfully run | |
| 258 ## Input : $archive, $filePath | |
| 259 ## Output : 1 | |
| 260 ## Usage : unzipFile($archive, $want, $dir) ; | |
| 261 | |
| 262 =cut | |
| 263 sub cleanUnzip { | |
| 264 ## Retrieve Values | |
| 265 my $self = shift ; | |
| 266 my ($archive, $filePath ) = @_ ; | |
| 267 | |
| 268 if ( (defined $archive) and (defined $filePath) ) { | |
| 269 | |
| 270 croak "There was a problem extracting $filePath from $archive" unless (-e $filePath); | |
| 271 unlink $archive ; | |
| 272 } | |
| 273 else { | |
| 274 croak "Given filePath or the archive are undef\n" ; | |
| 275 } | |
| 276 | |
| 277 return 1; | |
| 278 } | |
| 279 ## END of SUB | |
| 280 | |
| 281 1 ; | |
| 282 | |
| 283 | |
| 284 __END__ | |
| 285 | |
| 286 =head1 SUPPORT | |
| 287 | |
| 288 You can find documentation for this module with the perldoc command. | |
| 289 | |
| 290 perldoc XXX.pm | |
| 291 | |
| 292 =head1 Exports | |
| 293 | |
| 294 =over 4 | |
| 295 | |
| 296 =item :ALL is ... | |
| 297 | |
| 298 =back | |
| 299 | |
| 300 =head1 AUTHOR | |
| 301 | |
| 302 Franck Giacomoni E<lt>franck.giacomoni@clermont.inra.frE<gt> | |
| 303 | |
| 304 =head1 LICENSE | |
| 305 | |
| 306 This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. | |
| 307 | |
| 308 =head1 VERSION | |
| 309 | |
| 310 version 1 : xx / xx / 201x | |
| 311 | |
| 312 version 2 : ?? | |
| 313 | |
| 314 =cut |
