changeset 2:3d4cd0e3891f draft

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
author iuc
date Fri, 10 Jun 2016 08:25:06 -0400
parents d57ebdd39f0f
children 0e9624bcd707
files data_manager/.data_manager_snpsift_dbnsfp.py.swp data_manager/data_manager_snpsift_dbnsfp.py
diffstat 2 files changed, 36 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
Binary file data_manager/.data_manager_snpsift_dbnsfp.py.swp has changed
--- a/data_manager/data_manager_snpsift_dbnsfp.py	Wed Jun 08 17:10:36 2016 -0400
+++ b/data_manager/data_manager_snpsift_dbnsfp.py	Fri Jun 10 08:25:06 2016 -0400
@@ -103,11 +103,45 @@
         files = [f for f in allfiles if re.match(dbNSFP_file_pat, f)]
         files = sorted(files, key=natural_sortkey)
         for j, file in enumerate(files):
+            tempfiles = []
+            tempfiles.append(file + "_%d" % len(tempfiles))
+            tfh = open(tempfiles[-1], 'w')
+            lastpos = None
             fh = my_zip.open(file, 'rU')
             for i, line in enumerate(fh):
-                if j > 0 and i == 0:
+                if i == 0:
+                    if j == 0:
+                        wtr.write(line)
                     continue
-                wtr.write(line)
+                else:
+                    pos = int(line.split('\t')[1])
+                    if lastpos and pos < lastpos:
+                        tfh.close()
+                        tempfiles.append(file + "_%d" % len(tempfiles))
+                        tfh = open(tempfiles[-1], 'w')
+                        print >> sys.stderr, "%s [%d] pos: %d < %d" % (file, i, pos, lastpos)
+                    lastpos = pos
+                tfh.write(line)
+            tfh.close()
+            if len(tempfiles) == 1:
+                with open(tempfiles[0], 'r') as tfh:
+                    wtr.writelines(tfh.readlines())
+            else:
+                tfha = [open(temp, 'r') for temp in tempfiles]
+                lines = [tfh.readline() for tfh in tfha]
+                curpos = [int(line.split('\t')[1]) for line in lines]
+                while len(tfha) > 0:
+                    k = curpos.index(min(curpos))
+                    wtr.write(lines[k])
+                    line = tfha[k].readline()
+                    if line:
+                        lines[k] = line
+                        curpos[k] = int(line.split('\t')[1])
+                    else:
+                        tfha[k].close()
+                        del tfha[k]
+                        del lines[k]
+                        del curpos[k]
     return dbnsfp_tsv