annotate bulk_download.xml @ 3:4d03df88688d draft default tip

Uploaded
author kellrott
date Tue, 24 Jul 2012 17:42:17 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
4d03df88688d Uploaded
kellrott
parents:
diff changeset
1 <tool id="bulk_download" name="Bulk Download" version="1.0">
4d03df88688d Uploaded
kellrott
parents:
diff changeset
2 <description>Bulk Downloader</description>
4d03df88688d Uploaded
kellrott
parents:
diff changeset
3 <command interpreter="python">$script_file $output $output.id $__new_file_path__</command>
4d03df88688d Uploaded
kellrott
parents:
diff changeset
4 <inputs>
4d03df88688d Uploaded
kellrott
parents:
diff changeset
5 <param name="urls_txt" type="text" area="True" size="5x35" label="URL Text" optional="True"/>
4d03df88688d Uploaded
kellrott
parents:
diff changeset
6 <param name="urls_file" type="data" label="URL File" optional="True"/>
4d03df88688d Uploaded
kellrott
parents:
diff changeset
7 <param name="decompress" type="boolean" label="Decompress" value="true"/>
4d03df88688d Uploaded
kellrott
parents:
diff changeset
8
4d03df88688d Uploaded
kellrott
parents:
diff changeset
9 </inputs>
4d03df88688d Uploaded
kellrott
parents:
diff changeset
10 <outputs>
4d03df88688d Uploaded
kellrott
parents:
diff changeset
11 <data name="output"/>
4d03df88688d Uploaded
kellrott
parents:
diff changeset
12 </outputs>
4d03df88688d Uploaded
kellrott
parents:
diff changeset
13 <configfiles>
4d03df88688d Uploaded
kellrott
parents:
diff changeset
14 <configfile name="script_file"><![CDATA[#!/usr/bin/env python
4d03df88688d Uploaded
kellrott
parents:
diff changeset
15 import os
4d03df88688d Uploaded
kellrott
parents:
diff changeset
16 import sys
4d03df88688d Uploaded
kellrott
parents:
diff changeset
17 import urllib
4d03df88688d Uploaded
kellrott
parents:
diff changeset
18 import tempfile
4d03df88688d Uploaded
kellrott
parents:
diff changeset
19
4d03df88688d Uploaded
kellrott
parents:
diff changeset
20 urls_txt = """${urls_txt}"""
4d03df88688d Uploaded
kellrott
parents:
diff changeset
21 urls_file = """${urls_file}"""
4d03df88688d Uploaded
kellrott
parents:
diff changeset
22 decompress = "${decompress}"
4d03df88688d Uploaded
kellrott
parents:
diff changeset
23
4d03df88688d Uploaded
kellrott
parents:
diff changeset
24 output = sys.argv[1]
4d03df88688d Uploaded
kellrott
parents:
diff changeset
25 output_id = sys.argv[2]
4d03df88688d Uploaded
kellrott
parents:
diff changeset
26 output_dir = sys.argv[3]
4d03df88688d Uploaded
kellrott
parents:
diff changeset
27
4d03df88688d Uploaded
kellrott
parents:
diff changeset
28
4d03df88688d Uploaded
kellrott
parents:
diff changeset
29 if len(urls_file) and urls_file != "None":
4d03df88688d Uploaded
kellrott
parents:
diff changeset
30 handle = open(urls_file)
4d03df88688d Uploaded
kellrott
parents:
diff changeset
31 else:
4d03df88688d Uploaded
kellrott
parents:
diff changeset
32 handle = StringIO(urls_txt)
4d03df88688d Uploaded
kellrott
parents:
diff changeset
33
4d03df88688d Uploaded
kellrott
parents:
diff changeset
34 #fix for multiple outputs
4d03df88688d Uploaded
kellrott
parents:
diff changeset
35 opath = output
4d03df88688d Uploaded
kellrott
parents:
diff changeset
36 for line in handle:
4d03df88688d Uploaded
kellrott
parents:
diff changeset
37 url = line.rstrip()
4d03df88688d Uploaded
kellrott
parents:
diff changeset
38 base = os.path.basename(url)
4d03df88688d Uploaded
kellrott
parents:
diff changeset
39 h, path = tempfile.mkstemp(dir="./")
4d03df88688d Uploaded
kellrott
parents:
diff changeset
40 os.close(h)
4d03df88688d Uploaded
kellrott
parents:
diff changeset
41 urllib.urlretrieve(url, path)
4d03df88688d Uploaded
kellrott
parents:
diff changeset
42 if decompress == "true":
4d03df88688d Uploaded
kellrott
parents:
diff changeset
43 if base.endswith(".gz"):
4d03df88688d Uploaded
kellrott
parents:
diff changeset
44 import gzip
4d03df88688d Uploaded
kellrott
parents:
diff changeset
45 ohandle = open(opath, "wb")
4d03df88688d Uploaded
kellrott
parents:
diff changeset
46 f = gzip.GzipFile(path, "rb")
4d03df88688d Uploaded
kellrott
parents:
diff changeset
47 for chunk in iter(lambda: f.read(8192), ''):
4d03df88688d Uploaded
kellrott
parents:
diff changeset
48 ohandle.write(chunk)
4d03df88688d Uploaded
kellrott
parents:
diff changeset
49 ohandle.close()
4d03df88688d Uploaded
kellrott
parents:
diff changeset
50 f.close()
4d03df88688d Uploaded
kellrott
parents:
diff changeset
51
4d03df88688d Uploaded
kellrott
parents:
diff changeset
52 if os.path.exists(path):
4d03df88688d Uploaded
kellrott
parents:
diff changeset
53 os.unlink(path)
4d03df88688d Uploaded
kellrott
parents:
diff changeset
54 else:
4d03df88688d Uploaded
kellrott
parents:
diff changeset
55 shutil.move(path, opath)
4d03df88688d Uploaded
kellrott
parents:
diff changeset
56
4d03df88688d Uploaded
kellrott
parents:
diff changeset
57 ]]></configfile>
4d03df88688d Uploaded
kellrott
parents:
diff changeset
58 </configfiles>
4d03df88688d Uploaded
kellrott
parents:
diff changeset
59 </tool>