comparison dbkit_merge.py @ 3:03e124ff7e26 draft

"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
author guerler
date Wed, 16 Dec 2020 13:11:35 +0000
parents 81c7d4668a7e
children
comparison
equal deleted inserted replaced
2:81c7d4668a7e 3:03e124ff7e26
1 #! /usr/bin/env python3 1 #! /usr/bin/env python3
2 import argparse 2 import argparse
3 from os import system
4 from os.path import getsize 3 from os.path import getsize
4 from shutil import copyfile
5 5
6 from dbkit_package.DBKit import DBKit 6 from dbkit_package.DBKit import DBKit, writeEntry
7 7
8 8
9 def main(args): 9 def main(args):
10 logFile = open(args.log, "w") 10 logFile = open(args.log, "w")
11 outIndex = args.outindex 11 outputIndex = args.outputindex
12 outData = args.outdata 12 outputDatabase = args.outputdatabase
13 if getsize(args.firstindex) > getsize(args.secondindex): 13 if getsize(args.firstindex) > getsize(args.secondindex):
14 firstIndex = args.firstindex 14 firstIndex = args.firstindex
15 firstData = args.firstdata 15 firstData = args.firstdata
16 secondIndex = args.secondindex 16 secondIndex = args.secondindex
17 secondData = args.seconddata 17 secondData = args.seconddata
18 else: 18 else:
19 firstIndex = args.secondindex 19 firstIndex = args.secondindex
20 firstData = args.seconddata 20 firstData = args.seconddata
21 secondIndex = args.firstindex 21 secondIndex = args.firstindex
22 secondData = args.firstdata 22 secondData = args.firstdata
23 system("cp %s %s" % (firstIndex, outIndex)) 23 copyfile(firstIndex, outputIndex)
24 system("cp %s %s" % (firstData, outData)) 24 copyfile(firstData, outputDatabase)
25 firstEntries = set() 25 firstEntries = set()
26 with open(firstIndex, "r") as f: 26 with open(firstIndex, "r") as f:
27 for line in f: 27 for line in f:
28 name = line.split()[0] 28 name = line.split()[0]
29 firstEntries.add(name) 29 firstEntries.add(name)
31 secondEntries = list() 31 secondEntries = list()
32 with open(secondIndex, "r") as f: 32 with open(secondIndex, "r") as f:
33 for line in f: 33 for line in f:
34 name = line.split()[0] 34 name = line.split()[0]
35 secondEntries.append(name) 35 secondEntries.append(name)
36 tempFile = "temp.dat" 36 fileName = "temp.dat"
37 count = 0 37 count = 0
38 dbkit = DBKit(secondIndex, secondData) 38 dbkit = DBKit(secondIndex, secondData)
39 for secondKey in secondEntries: 39 for secondKey in secondEntries:
40 if secondKey not in firstEntries: 40 if secondKey not in firstEntries:
41 dbkit.createFile(secondKey, tempFile) 41 dbkit.createFile(secondKey, fileName)
42 entrySize = getsize(tempFile) 42 writeEntry(secondKey, fileName, outputIndex, outputDatabase)
43 currentSize = getsize(outData)
44 system("cat %s >> %s" % (tempFile, outData))
45 system("echo '%s\t%s\t%s' >> %s" % (secondKey, currentSize, entrySize, outIndex))
46 count = count + 1 43 count = count + 1
47 else: 44 else:
48 logFile.write("Skipping existing entry %s.\n" % secondKey) 45 logFile.write("Skipping existing entry %s.\n" % secondKey)
49 logFile.write("Added %s entries.\n" % count) 46 logFile.write("Added %s entries.\n" % count)
50 logFile.close() 47 logFile.close()
54 parser = argparse.ArgumentParser(description='DBKit - Merge database pair.') 51 parser = argparse.ArgumentParser(description='DBKit - Merge database pair.')
55 parser.add_argument('-fi', '--firstindex', help='First Index file', required=True) 52 parser.add_argument('-fi', '--firstindex', help='First Index file', required=True)
56 parser.add_argument('-fd', '--firstdata', help='First Data file', required=True) 53 parser.add_argument('-fd', '--firstdata', help='First Data file', required=True)
57 parser.add_argument('-si', '--secondindex', help='Second Index file', required=True) 54 parser.add_argument('-si', '--secondindex', help='Second Index file', required=True)
58 parser.add_argument('-sd', '--seconddata', help='Second Data file', required=True) 55 parser.add_argument('-sd', '--seconddata', help='Second Data file', required=True)
59 parser.add_argument('-oi', '--outindex', help='Output Index file', required=True) 56 parser.add_argument('-oi', '--outputindex', help='Output Index file', required=True)
60 parser.add_argument('-od', '--outdata', help='Output Data file', required=True) 57 parser.add_argument('-od', '--outputdatabase', help='Output Data file', required=True)
61 parser.add_argument('-log', '--log', help='Log file', required=True) 58 parser.add_argument('-log', '--log', help='Log file', required=True)
62 args = parser.parse_args() 59 args = parser.parse_args()
63 main(args) 60 main(args)