Mercurial > repos > guerler > dbkit
comparison dbkit_merge.py @ 2:81c7d4668a7e draft
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
| author | guerler | 
|---|---|
| date | Wed, 16 Dec 2020 12:02:48 +0000 | 
| parents | |
| children | 03e124ff7e26 | 
   comparison
  equal
  deleted
  inserted
  replaced
| 1:987e55ea29b8 | 2:81c7d4668a7e | 
|---|---|
| 1 #! /usr/bin/env python3 | |
| 2 import argparse | |
| 3 from os import system | |
| 4 from os.path import getsize | |
| 5 | |
| 6 from dbkit_package.DBKit import DBKit | |
| 7 | |
| 8 | |
| 9 def main(args): | |
| 10 logFile = open(args.log, "w") | |
| 11 outIndex = args.outindex | |
| 12 outData = args.outdata | |
| 13 if getsize(args.firstindex) > getsize(args.secondindex): | |
| 14 firstIndex = args.firstindex | |
| 15 firstData = args.firstdata | |
| 16 secondIndex = args.secondindex | |
| 17 secondData = args.seconddata | |
| 18 else: | |
| 19 firstIndex = args.secondindex | |
| 20 firstData = args.seconddata | |
| 21 secondIndex = args.firstindex | |
| 22 secondData = args.firstdata | |
| 23 system("cp %s %s" % (firstIndex, outIndex)) | |
| 24 system("cp %s %s" % (firstData, outData)) | |
| 25 firstEntries = set() | |
| 26 with open(firstIndex, "r") as f: | |
| 27 for line in f: | |
| 28 name = line.split()[0] | |
| 29 firstEntries.add(name) | |
| 30 logFile.write("Detected %s entries.\n" % len(firstEntries)) | |
| 31 secondEntries = list() | |
| 32 with open(secondIndex, "r") as f: | |
| 33 for line in f: | |
| 34 name = line.split()[0] | |
| 35 secondEntries.append(name) | |
| 36 tempFile = "temp.dat" | |
| 37 count = 0 | |
| 38 dbkit = DBKit(secondIndex, secondData) | |
| 39 for secondKey in secondEntries: | |
| 40 if secondKey not in firstEntries: | |
| 41 dbkit.createFile(secondKey, tempFile) | |
| 42 entrySize = getsize(tempFile) | |
| 43 currentSize = getsize(outData) | |
| 44 system("cat %s >> %s" % (tempFile, outData)) | |
| 45 system("echo '%s\t%s\t%s' >> %s" % (secondKey, currentSize, entrySize, outIndex)) | |
| 46 count = count + 1 | |
| 47 else: | |
| 48 logFile.write("Skipping existing entry %s.\n" % secondKey) | |
| 49 logFile.write("Added %s entries.\n" % count) | |
| 50 logFile.close() | |
| 51 | |
| 52 | |
| 53 if __name__ == "__main__": | |
| 54 parser = argparse.ArgumentParser(description='DBKit - Merge database pair.') | |
| 55 parser.add_argument('-fi', '--firstindex', help='First Index file', required=True) | |
| 56 parser.add_argument('-fd', '--firstdata', help='First Data file', required=True) | |
| 57 parser.add_argument('-si', '--secondindex', help='Second Index file', required=True) | |
| 58 parser.add_argument('-sd', '--seconddata', help='Second Data file', required=True) | |
| 59 parser.add_argument('-oi', '--outindex', help='Output Index file', required=True) | |
| 60 parser.add_argument('-od', '--outdata', help='Output Data file', required=True) | |
| 61 parser.add_argument('-log', '--log', help='Log file', required=True) | |
| 62 args = parser.parse_args() | |
| 63 main(args) | 
