comparison tools/data_source/microbial_import.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9071e359b9a3
1 #!/usr/bin/env python
2
3 """
4 Script that imports locally stored data as a new dataset for the user
5 Usage: import id outputfile
6 """
7 import sys, os
8 from shutil import copyfile
9
10 assert sys.version_info[:2] >= ( 2, 4 )
11
12 BUFFER = 1048576
13
14 uids = sys.argv[1].split(",")
15 out_file1 = sys.argv[2]
16
17 #remove NONE from uids
18 have_none = True
19 while have_none:
20 try:
21 uids.remove('None')
22 except:
23 have_none = False
24
25
26 #create dictionary keyed by uid of tuples of (displayName,filePath,build) for all files
27 available_files = {}
28 try:
29 filename = sys.argv[-1]
30 for i, line in enumerate( file( filename ) ):
31 if not line or line[0:1] == "#" : continue
32 fields = line.split('\t')
33 try:
34 info_type = fields.pop(0)
35
36 if info_type.upper()=="DATA":
37 uid = fields.pop(0)
38 org_num = fields.pop(0)
39 chr_acc = fields.pop(0)
40 feature = fields.pop(0)
41 filetype = fields.pop(0)
42 path = fields.pop(0).replace("\r","").replace("\n","")
43
44 file_type = filetype
45 build = org_num
46 description = uid
47 else:
48 continue
49 except:
50 continue
51
52 available_files[uid]=(description,path,build,file_type,chr_acc)
53 except:
54 print >>sys.stderr, "It appears that the configuration file for this tool is missing."
55
56 #create list of tuples of (displayName,FileName,build) for desired files
57 desired_files = []
58 for uid in uids:
59 try:
60 desired_files.append(available_files[uid])
61 except:
62 continue
63
64 #copy first file to contents of given output file
65 file1_copied = False
66 while not file1_copied:
67 try:
68 first_file = desired_files.pop(0)
69 except:
70 print >>sys.stderr, "There were no valid files requested."
71 sys.exit()
72 file1_desc, file1_path, file1_build, file1_type,file1_chr_acc = first_file
73 try:
74 copyfile(file1_path,out_file1)
75 print "#File1\t"+file1_desc+"\t"+file1_chr_acc+"\t"+file1_build+"\t"+file1_type
76 file1_copied = True
77 except:
78 print >>sys.stderr, "The file specified is missing."
79 continue
80 #print >>sys.stderr, "The file specified is missing."
81
82
83 #Tell post-process filter where remaining files reside
84 for extra_output in desired_files:
85 file_desc, file_path, file_build, file_type,file_chr_acc = extra_output
86 print "#NewFile\t"+file_desc+"\t"+file_chr_acc+"\t"+file_build+"\t"+file_path+"\t"+file_type