Mercurial > repos > guerler > dbkit
diff dbkit_merge.py @ 2:81c7d4668a7e draft
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
author | guerler |
---|---|
date | Wed, 16 Dec 2020 12:02:48 +0000 |
parents | |
children | 03e124ff7e26 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dbkit_merge.py Wed Dec 16 12:02:48 2020 +0000 @@ -0,0 +1,63 @@ +#! /usr/bin/env python3 +import argparse +from os import system +from os.path import getsize + +from dbkit_package.DBKit import DBKit + + +def main(args): + logFile = open(args.log, "w") + outIndex = args.outindex + outData = args.outdata + if getsize(args.firstindex) > getsize(args.secondindex): + firstIndex = args.firstindex + firstData = args.firstdata + secondIndex = args.secondindex + secondData = args.seconddata + else: + firstIndex = args.secondindex + firstData = args.seconddata + secondIndex = args.firstindex + secondData = args.firstdata + system("cp %s %s" % (firstIndex, outIndex)) + system("cp %s %s" % (firstData, outData)) + firstEntries = set() + with open(firstIndex, "r") as f: + for line in f: + name = line.split()[0] + firstEntries.add(name) + logFile.write("Detected %s entries.\n" % len(firstEntries)) + secondEntries = list() + with open(secondIndex, "r") as f: + for line in f: + name = line.split()[0] + secondEntries.append(name) + tempFile = "temp.dat" + count = 0 + dbkit = DBKit(secondIndex, secondData) + for secondKey in secondEntries: + if secondKey not in firstEntries: + dbkit.createFile(secondKey, tempFile) + entrySize = getsize(tempFile) + currentSize = getsize(outData) + system("cat %s >> %s" % (tempFile, outData)) + system("echo '%s\t%s\t%s' >> %s" % (secondKey, currentSize, entrySize, outIndex)) + count = count + 1 + else: + logFile.write("Skipping existing entry %s.\n" % secondKey) + logFile.write("Added %s entries.\n" % count) + logFile.close() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='DBKit - Merge database pair.') + parser.add_argument('-fi', '--firstindex', help='First Index file', required=True) + parser.add_argument('-fd', '--firstdata', help='First Data file', required=True) + parser.add_argument('-si', '--secondindex', help='Second Index file', required=True) + parser.add_argument('-sd', '--seconddata', help='Second Data file', required=True) + parser.add_argument('-oi', '--outindex', help='Output Index file', required=True) + parser.add_argument('-od', '--outdata', help='Output Data file', required=True) + parser.add_argument('-log', '--log', help='Log file', required=True) + args = parser.parse_args() + main(args)