diff tools/data_source/microbial_import.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tools/data_source/microbial_import.py	Fri Mar 09 19:37:19 2012 -0500
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+
+"""
+Script that imports locally stored data as a new dataset for the user
+Usage: import id outputfile
+"""
+import sys, os
+from shutil import copyfile
+
+assert sys.version_info[:2] >= ( 2, 4 )
+
+BUFFER = 1048576
+
+uids = sys.argv[1].split(",")
+out_file1 = sys.argv[2]
+
+#remove NONE from uids
+have_none = True
+while have_none:
+    try:
+        uids.remove('None')
+    except:
+        have_none = False
+
+
+#create dictionary keyed by uid of tuples of (displayName,filePath,build) for all files
+available_files = {}
+try:
+    filename = sys.argv[-1]
+    for i, line in enumerate( file( filename ) ):
+        if not line or line[0:1] == "#" : continue
+        fields = line.split('\t')
+        try:
+            info_type = fields.pop(0)
+            
+            if info_type.upper()=="DATA":
+                uid = fields.pop(0)
+                org_num = fields.pop(0)
+                chr_acc = fields.pop(0)
+                feature = fields.pop(0)
+                filetype = fields.pop(0)
+                path = fields.pop(0).replace("\r","").replace("\n","")
+                
+                file_type = filetype
+                build = org_num
+                description = uid
+            else:
+                continue
+        except:
+            continue
+
+        available_files[uid]=(description,path,build,file_type,chr_acc)
+except:
+    print >>sys.stderr, "It appears that the configuration file for this tool is missing."
+
+#create list of tuples of (displayName,FileName,build) for desired files
+desired_files = []
+for uid in uids:
+    try:
+        desired_files.append(available_files[uid])
+    except:
+        continue
+
+#copy first file to contents of given output file
+file1_copied = False
+while not file1_copied:
+    try:
+        first_file = desired_files.pop(0)
+    except:
+        print >>sys.stderr, "There were no valid files requested."
+        sys.exit()
+    file1_desc, file1_path, file1_build, file1_type,file1_chr_acc = first_file
+    try:
+        copyfile(file1_path,out_file1)
+        print "#File1\t"+file1_desc+"\t"+file1_chr_acc+"\t"+file1_build+"\t"+file1_type
+        file1_copied = True
+    except:
+        print >>sys.stderr, "The file specified is missing."
+        continue
+        #print >>sys.stderr, "The file specified is missing."
+    
+
+#Tell post-process filter where remaining files reside
+for extra_output in desired_files:
+    file_desc, file_path, file_build, file_type,file_chr_acc = extra_output
+    print "#NewFile\t"+file_desc+"\t"+file_chr_acc+"\t"+file_build+"\t"+file_path+"\t"+file_type