Mercurial > repos > iuc > data_manager_snpsift_dbnsfp
changeset 2:3d4cd0e3891f draft
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/data_managers/data_manager_snpsift_dbnsfp commit 6eea2aa4bb66b329385e1f7324dafae805b555c8
author | iuc |
---|---|
date | Fri, 10 Jun 2016 08:25:06 -0400 |
parents | d57ebdd39f0f |
children | 0e9624bcd707 |
files | data_manager/.data_manager_snpsift_dbnsfp.py.swp data_manager/data_manager_snpsift_dbnsfp.py |
diffstat | 2 files changed, 36 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/data_manager/data_manager_snpsift_dbnsfp.py Wed Jun 08 17:10:36 2016 -0400 +++ b/data_manager/data_manager_snpsift_dbnsfp.py Fri Jun 10 08:25:06 2016 -0400 @@ -103,11 +103,45 @@ files = [f for f in allfiles if re.match(dbNSFP_file_pat, f)] files = sorted(files, key=natural_sortkey) for j, file in enumerate(files): + tempfiles = [] + tempfiles.append(file + "_%d" % len(tempfiles)) + tfh = open(tempfiles[-1], 'w') + lastpos = None fh = my_zip.open(file, 'rU') for i, line in enumerate(fh): - if j > 0 and i == 0: + if i == 0: + if j == 0: + wtr.write(line) continue - wtr.write(line) + else: + pos = int(line.split('\t')[1]) + if lastpos and pos < lastpos: + tfh.close() + tempfiles.append(file + "_%d" % len(tempfiles)) + tfh = open(tempfiles[-1], 'w') + print >> sys.stderr, "%s [%d] pos: %d < %d" % (file, i, pos, lastpos) + lastpos = pos + tfh.write(line) + tfh.close() + if len(tempfiles) == 1: + with open(tempfiles[0], 'r') as tfh: + wtr.writelines(tfh.readlines()) + else: + tfha = [open(temp, 'r') for temp in tempfiles] + lines = [tfh.readline() for tfh in tfha] + curpos = [int(line.split('\t')[1]) for line in lines] + while len(tfha) > 0: + k = curpos.index(min(curpos)) + wtr.write(lines[k]) + line = tfha[k].readline() + if line: + lines[k] = line + curpos[k] = int(line.split('\t')[1]) + else: + tfha[k].close() + del tfha[k] + del lines[k] + del curpos[k] return dbnsfp_tsv