annotate tools/data_source/import.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env python
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 Script that imports locally stored data as a new dataset for the user
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 Usage: import id outputfile
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 import sys, os
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 assert sys.version_info[:2] >= ( 2, 4 )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 BUFFER = 1048576
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 dataid = sys.argv[1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 out_name = sys.argv[2]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 id2name = {
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 'eryth' : 'ErythPreCRMmm3_cusTrk.txt',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 'cishg16' : 'ReglRegHBBhg16CusTrk.txt',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 'cishg17' : 'ReglRegHBBhg17CusTrk.txt',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 'exons' : 'ExonsKnownGenes_mm3.txt',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 'krhg16' : 'known_regulatory_hg16.bed',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 'krhg17' : 'known_regulatory_hg17.bed',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 'tARhg16mmc' : 'hg16.mouse.t_AR.cold.bed',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 'tARhg16mmm' : 'hg16.mouse.t_AR.medium.bed',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 'tARhg16mmh' : 'hg16.mouse.t_AR.hot.bed',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 'tARhg16rnc' : 'hg16.rat.t_AR.cold.bed',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 'tARhg16rnm' : 'hg16.rat.t_AR.medium.bed',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 'tARhg16rnh' : 'hg16.rat.t_AR.hot.bed',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 'phastConsHg16' : 'phastConsMost_hg16.bed',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 'omimhg16' : 'omimDisorders_hg16.tab',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 'omimhg17' : 'omimDisorders_hg17.tab',
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 }
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 fname = id2name.get(dataid, '')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 if not fname:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 print 'Importing invalid data %s' % dataid
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 sys.exit()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 print 'Imported %s' % fname
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 # this path is hardcoded
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 inp_name = os.path.join('database', 'import', fname)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 inp = open(inp_name, 'rt')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 print 'Could not find file %s' % inp_name
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 sys.exit()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 out = open(out_name, 'wt')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 while 1:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 data = inp.read(BUFFER)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 if not data:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 break
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 out.write(data)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 inp.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 out.close()