comparison ensemb_rep_gvl-2bc9b66ada89/variant_effect_predictor/INSTALL.pl @ 0:9c00df4b30f5 draft default tip

Uploaded
author alonie
date Thu, 06 Jun 2013 00:49:58 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9c00df4b30f5
1 #!/usr/bin/perl
2
3 use Getopt::Long;
4 use LWP::Simple qw($ua getstore get);
5 use File::Listing qw(parse_dir);
6
7 $| = 1;
8 our $VERSION = 2.6;
9
10 # CONFIGURE
11 ###########
12
13 my ($DEST_DIR, $ENS_CVS_ROOT, $API_VERSION, $BIOPERL_URL, $CACHE_URL, $help);
14
15 GetOptions(
16 'DESTDIR|d=s' => \$DEST_DIR,
17 'VERSION|v=i' => \$API_VERSION,
18 'BIOPERL|b=s' => \$BIOPERL_URL,
19 'CACHEURL|u=s' => \$CACHE_URL,
20 'CACHEDIR|c=s' => \$CACHE_DIR,
21 'HELP|h' => \$help
22 );
23
24 if(defined($help)) {
25 usage();
26 exit(0);
27 }
28
29 my $default_dir_used;
30
31 # check if $DEST_DIR is default
32 if(defined($DEST_DIR)) {
33 print "Using non-default installation directory $DEST_DIR - you will probably need to add $DEST_DIR to your PERL5LIB\n";
34 $default_dir_used = 0;
35 }
36 else {
37 $DEST_DIR ||= '.';
38 $default_dir_used = 1;
39 }
40
41 my $lib_dir = $DEST_DIR;
42
43 $DEST_DIR .= '/Bio';
44 $ENS_CVS_ROOT ||= 'http://cvs.sanger.ac.uk/cgi-bin/viewvc.cgi/';
45 $BIOPERL_URL ||= 'http://bioperl.org/DIST/old_releases/bioperl-1.2.3.tar.gz';
46 $API_VERSION ||= 68;
47 $CACHE_URL ||= "ftp://ftp.ensembl.org/pub/release-$API_VERSION/variation/VEP";
48 $CACHE_DIR ||= $ENV{HOME}.'/.vep';
49
50 our $prev_progress = 0;
51
52 print "\nHello! This installer is configured to install v$API_VERSION of the Ensembl API for use by the VEP.\nIt will not affect any existing installations of the Ensembl API that you may have.\n\nIt will also download and install cache files from Ensembl's FTP server.\n\n";
53
54
55 # CHECK EXISTING
56 ################
57
58 print "Checking for installed versions of the Ensembl API...";
59
60 # test if the user has the API installed
61 my $has_api = {
62 'ensembl' => 0,
63 'ensembl-variation' => 0,
64 'ensembl-functgenomics' => 0,
65 };
66
67 eval q{
68 use Bio::EnsEMBL::Registry;
69 };
70
71 my $installed_version;
72
73 unless($@) {
74 $has_api->{ensembl} = 1;
75
76 $installed_version = Bio::EnsEMBL::Registry->software_version;
77 }
78
79 eval q{
80 use Bio::EnsEMBL::Variation::Utils::VEP;
81 };
82
83 $has_api->{'ensembl-variation'} = 1 unless $@;
84
85 eval q{
86 use Bio::EnsEMBL::Funcgen::RegulatoryFeature;
87 };
88
89 $has_api->{'ensembl-functgenomics'} = 1 unless $@;
90
91
92 print "done\n";
93
94 my $total = 0;
95 $total += $_ for values %$has_api;
96
97 my $message;
98
99 if($total == 3) {
100
101 if(defined($installed_version)) {
102 if($installed_version == $API_VERSION) {
103 $message = "It looks like you already have v$API_VERSION of the API installed.\nYou shouldn't need to install the API";
104 }
105
106 elsif($installed_version > $API_VERSION) {
107 $message = "It looks like this installer is for an older distribution of the API than you already have";
108 }
109
110 else {
111 $message = "It looks like you have an older version (v$installed_version) of the API installed.\nThis installer will install a limited set of the API v$API_VERSION for use by the VEP only";
112 }
113 }
114
115 else {
116 $message = "It looks like you have an unidentified version of the API installed.\nThis installer will install a limited set of the API v$API_VERSION for use by the VEP only"
117 }
118 }
119
120 elsif($total > 0) {
121 $message = "It looks like you already have the following API modules installed:\n\n".(join "\n", grep {$has_api->{$_}} keys %$has_api)."\n\nThe VEP requires the ensembl, ensembl-variation and optionally ensembl-functgenomics modules";
122 }
123
124 if(defined($message)) {
125 print "$message\n\nAre you sure you want to continue installing the API (y/n)? ";
126
127 my $ok = <>;
128
129 if($ok !~ /^y/i) {
130 print " - skipping API installation\n";
131 goto CACHE;
132 }
133 }
134
135
136
137 # SETUP
138 #######
139
140 print "\nSetting up directories\n";
141
142 # check if install dir exists
143 if(-e $DEST_DIR) {
144 print "Destination directory $DEST_DIR already exists.\nDo you want to overwrite it (if updating VEP this is probably OK) (y/n)? ";
145
146 my $ok = <>;
147
148 if($ok !~ /^y/i) {
149 print "Exiting\n";
150 exit(0);
151 }
152
153 else {
154 unless($default_dir_used) {
155 print "WARNING: You are using a non-default install directory.\nPressing \"y\" again will remove $DEST_DIR and its contents!!!\nAre you really, really sure (y/n)? ";
156 $ok = <>;
157
158 if($ok !~ /^y/i) {
159 print "Exiting\n";
160 exit(0);
161 }
162 }
163
164 # try to delete the existing dir
165 `rm -rf $DEST_DIR`;
166 }
167 }
168
169 mkdir($DEST_DIR) or die "ERROR: Could not make directory $DEST_DIR\n";
170 mkdir($DEST_DIR.'/tmp') or die "ERROR: Could not make directory $DEST_DIR/tmp\n";
171
172 # set up a user agent's proxy
173 $ua->env_proxy;
174
175 # enable progress
176 eval q{
177 $ua->show_progress(1);
178 };
179
180
181
182 # API
183 #####
184
185 print "\nDownloading required files\n";
186
187 # set up the URLs
188 my $ensembl_url_tail = '.tar.gz?root=ensembl&view=tar&only_with_tag=branch-ensembl-';
189
190 foreach my $module(qw(ensembl ensembl-variation ensembl-functgenomics)) {
191 my $url = $ENS_CVS_ROOT.$module.$ensembl_url_tail.$API_VERSION;
192
193 print " - fetching $module\n";
194
195 my $target_file = $DEST_DIR.'/tmp/'.$module.'.tar.gz';
196
197 unless(getstore($url, $target_file) == 200) {
198 die "ERROR: Failed to fetch $module from $url - perhaps you have a proxy/firewall? Set the http_proxy ENV variable if you do\nError code: $response\n";
199 }
200
201 print " - unpacking $target_file\n";
202 unpack_tar("$DEST_DIR/tmp/$module.tar.gz", "$DEST_DIR/tmp/");
203
204 print " - moving files\n";
205
206 if($module eq 'ensembl') {
207 `mv -f $DEST_DIR/tmp/$module/modules/Bio/EnsEMBL $DEST_DIR/`;
208 }
209 elsif($module eq 'ensembl-variation') {
210 `mv -f $DEST_DIR/tmp/$module/modules/Bio/EnsEMBL/Variation $DEST_DIR/EnsEMBL/`;
211 }
212 elsif($module eq 'ensembl-functgenomics') {
213 `mv -f $DEST_DIR/tmp/$module/modules/Bio/EnsEMBL/Funcgen $DEST_DIR/EnsEMBL/`;
214 }
215
216 `rm -rf $DEST_DIR/tmp/$module`;# or die "ERROR: Failed to remove directory $DEST_DIR/$module\n";
217 }
218
219
220
221 # BIOPERL
222 #########
223
224 # now get BioPerl
225 print " - fetching BioPerl\n";
226
227 $bioperl_file = (split /\//, $BIOPERL_URL)[-1];
228
229 my $target_file = $DEST_DIR.'/tmp/'.$bioperl_file;
230
231 unless(getstore($BIOPERL_URL, $target_file) == 200) {
232 die "ERROR: Failed to fetch BioPerl from $BIOPERL_URL - perhaps you have a proxy/firewall?\nError code: $response\n";
233 }
234
235 print " - unpacking $target_file\n";
236 unpack_tar("$DEST_DIR/tmp/$bioperl_file", "$DEST_DIR/tmp/");
237
238 print " - moving files\n";
239
240 $bioperl_file =~ /(bioperl.+?)\.tar\.gz/;
241 my $bioperl_dir = $1;
242 `mv -f $DEST_DIR/tmp/$bioperl_dir/Bio/* $DEST_DIR/`;
243 `rm -rf $DEST_DIR/tmp/$bioperl_dir`;# or die "ERROR: Failed to remove directory $DEST_DIR/$bioperl_dir\n";
244 `rm -rf $DEST_DIR/tmp`;
245
246
247
248 # TEST
249 ######
250
251 print "\nTesting VEP script\n";
252
253 my $test_vep = `perl variant_effect_predictor.pl --help 2>&1`;
254
255 $test_vep =~ /ENSEMBL VARIANT EFFECT PREDICTOR/ or die "ERROR: Testing VEP script failed with the following error\n$test_vep\n";
256
257 print " - OK!\n";
258
259
260
261 # CACHE FILES
262 #############
263
264 CACHE:
265
266 print "\nThe VEP can either connect to remote or local databases, or use local cache files.\n";
267 print "Cache files will be stored in $CACHE_DIR\n";
268 print "Do you want to install any cache files (y/n)? ";
269
270 my $ok = <>;
271
272 if($ok !~ /^y/i) {
273 print "Exiting\n";
274 exit(0);
275 }
276
277 # check cache dir exists
278 if(!(-e $CACHE_DIR)) {
279 print "Cache directory $CACHE_DIR does not exists - do you want to create it (y/n)? ";
280
281 my $ok = <>;
282
283 if($ok !~ /^y/i) {
284 print "Exiting\n";
285 exit(0);
286 }
287
288 mkdir($CACHE_DIR) or die "ERROR: Could not create directory $CACHE_DIR\n";
289 }
290
291 mkdir($CACHE_DIR.'/tmp') unless -e $CACHE_DIR.'/tmp';
292
293 # get list of species
294 print "\nDownloading list of available cache files\n";
295
296 my $num = 1;
297 my $species_list;
298 my @files;
299 push @files, map {$_->[0]} grep {$_->[0] =~ /tar.gz/} @{parse_dir(get($CACHE_URL))};
300
301 # if we don't have a species list, we'll have to guess
302 if(!scalar(@files)) {
303 print "Could not get current species list - using predefined list instead\n";
304
305 @files = (
306 "bos_taurus_vep_$API_VERSION.tar.gz",
307 "danio_rerio_vep_$API_VERSION.tar.gz",
308 "homo_sapiens_vep_$API_VERSION.tar.gz",
309 "homo_sapiens_vep_$API_VERSION\_sift_polyphen.tar.gz",
310 "mus_musculus_vep_$API_VERSION.tar.gz",
311 "rattus_norvegicus_vep_$API_VERSION.tar.gz",
312 );
313 }
314
315 foreach my $file(@files) {
316 $species_list .= $num++." : ".$file."\n";
317 }
318
319 print "The following species/files are available; which do you want (can specify multiple separated by spaces): \n$species_list\n? ";
320
321 foreach my $file(split /\s+/, <>) {
322 my $file_path = $files[$file - 1];
323
324 my ($species, $file_name);
325
326 if($file_path =~ /\//) {
327 ($species, $file_name) = (split /\//, $file_path);
328 }
329 else {
330 $file_name = $file_path;
331 $file_name =~ m/^(\w+?\_\w+?)\_vep/;
332 $species = $1;
333 }
334
335 # check if user already has this species and version
336 if(-e "$CACHE_DIR/$species/$API_VERSION") {
337 print "\nWARNING: It looks like you already have the cache for $species (v$API_VERSION) installed.\nIf you continue the existing cache will be overwritten.\nAre you sure you want to continue (y/n)? ";
338
339 my $ok = <>;
340
341 if($ok !~ /^y/i) {
342 print " - skipping $species\n";
343 next;
344 }
345
346 `rm -rf $CACHE_DIR/$species/$API_VERSION`;
347 }
348
349 my $target_file = "$CACHE_DIR/tmp/$file_name";
350
351 print " - downloading $CACHE_URL/$file_path\n";
352
353 unless(getstore("$CACHE_URL/$file_path", $target_file) == 200) {
354 die "ERROR: Failed to fetch cache file $file_name from $CACHE_URL/$file_path - perhaps you have a proxy/firewall? Set the http_proxy ENV variable if you do\nError code: $response\n";
355 }
356
357 print " - unpacking $file_name\n";
358
359 unpack_tar($target_file, $CACHE_DIR.'/tmp/');
360
361 # does species dir exist?
362 if(!-e "$CACHE_DIR/$species") {
363 mkdir("$CACHE_DIR/$species") or die "ERROR: Could not create directory $CACHE_DIR/$species\n";
364 }
365
366 # move files
367 `mv -f $CACHE_DIR/tmp/$species/$API_VERSION $CACHE_DIR/$species/`;
368 }
369
370 # cleanup
371 `rm -rf $CACHE_DIR/tmp`;
372
373 print "\nSuccess\n";
374
375
376 # SUBS
377 ######
378
379 # unpack a tarball with progress
380 sub unpack_tar {
381 my ($file, $dir) = @_;
382
383 my $count = 0;
384
385 open COUNT, "tar -tzvf $file 2>&1 |";
386 $count++ while(<COUNT>);
387 close COUNT;
388
389 my $i = 0;
390 open EXTRACT, "tar -C $dir -xzvf $file 2>&1 |";
391 progress($i++, $count) while(<EXTRACT>);
392 close EXTRACT;
393
394 progress(1,1);
395 $prev_progress = 0;
396 print "\n";
397
398 `rm -rf $file`;
399 }
400
401 # update or initiate progress bar
402 sub progress {
403 my ($i, $total) = @_;
404
405 my $width = 60;
406 my $percent = int(($i/$total) * 100);
407 my $numblobs = (($i/$total) * $width) - 2;
408
409 return unless $numblobs != $prev_progress;
410 $prev_progress = $numblobs;
411
412 printf("\r% -${width}s% 1s% 10s", '['.('=' x $numblobs).($numblobs == $width - 2 ? '=' : '>'), ']', "[ " . $percent . "% ]");
413 }
414
415 sub usage {
416 my $usage =<<END;
417 #---------------#
418 # VEP INSTALLER #
419 #---------------#
420
421 version $VERSION
422
423 By Will McLaren (wm2\@ebi.ac.uk)
424
425 http://www.ensembl.org/info/docs/variation/vep/vep_script.html#installer
426
427 Usage:
428 perl INSTALL.pl [arguments]
429
430 Options
431 =======
432
433 -h | --help Display this message and quit
434 -d | --DESTDIR Set destination directory for API install (default = './')
435 -v | --VERSION Set API version to install (default = 66)
436 -c | --CACHEDIR Set destination directory for cache files (default = '$HOME/.vep/')
437 END
438
439 print $usage;
440 }