Mercurial > repos > guerler > dbkit
comparison dbkit_merge.py @ 3:03e124ff7e26 draft
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
author | guerler |
---|---|
date | Wed, 16 Dec 2020 13:11:35 +0000 |
parents | 81c7d4668a7e |
children |
comparison
equal
deleted
inserted
replaced
2:81c7d4668a7e | 3:03e124ff7e26 |
---|---|
1 #! /usr/bin/env python3 | 1 #! /usr/bin/env python3 |
2 import argparse | 2 import argparse |
3 from os import system | |
4 from os.path import getsize | 3 from os.path import getsize |
4 from shutil import copyfile | |
5 | 5 |
6 from dbkit_package.DBKit import DBKit | 6 from dbkit_package.DBKit import DBKit, writeEntry |
7 | 7 |
8 | 8 |
9 def main(args): | 9 def main(args): |
10 logFile = open(args.log, "w") | 10 logFile = open(args.log, "w") |
11 outIndex = args.outindex | 11 outputIndex = args.outputindex |
12 outData = args.outdata | 12 outputDatabase = args.outputdatabase |
13 if getsize(args.firstindex) > getsize(args.secondindex): | 13 if getsize(args.firstindex) > getsize(args.secondindex): |
14 firstIndex = args.firstindex | 14 firstIndex = args.firstindex |
15 firstData = args.firstdata | 15 firstData = args.firstdata |
16 secondIndex = args.secondindex | 16 secondIndex = args.secondindex |
17 secondData = args.seconddata | 17 secondData = args.seconddata |
18 else: | 18 else: |
19 firstIndex = args.secondindex | 19 firstIndex = args.secondindex |
20 firstData = args.seconddata | 20 firstData = args.seconddata |
21 secondIndex = args.firstindex | 21 secondIndex = args.firstindex |
22 secondData = args.firstdata | 22 secondData = args.firstdata |
23 system("cp %s %s" % (firstIndex, outIndex)) | 23 copyfile(firstIndex, outputIndex) |
24 system("cp %s %s" % (firstData, outData)) | 24 copyfile(firstData, outputDatabase) |
25 firstEntries = set() | 25 firstEntries = set() |
26 with open(firstIndex, "r") as f: | 26 with open(firstIndex, "r") as f: |
27 for line in f: | 27 for line in f: |
28 name = line.split()[0] | 28 name = line.split()[0] |
29 firstEntries.add(name) | 29 firstEntries.add(name) |
31 secondEntries = list() | 31 secondEntries = list() |
32 with open(secondIndex, "r") as f: | 32 with open(secondIndex, "r") as f: |
33 for line in f: | 33 for line in f: |
34 name = line.split()[0] | 34 name = line.split()[0] |
35 secondEntries.append(name) | 35 secondEntries.append(name) |
36 tempFile = "temp.dat" | 36 fileName = "temp.dat" |
37 count = 0 | 37 count = 0 |
38 dbkit = DBKit(secondIndex, secondData) | 38 dbkit = DBKit(secondIndex, secondData) |
39 for secondKey in secondEntries: | 39 for secondKey in secondEntries: |
40 if secondKey not in firstEntries: | 40 if secondKey not in firstEntries: |
41 dbkit.createFile(secondKey, tempFile) | 41 dbkit.createFile(secondKey, fileName) |
42 entrySize = getsize(tempFile) | 42 writeEntry(secondKey, fileName, outputIndex, outputDatabase) |
43 currentSize = getsize(outData) | |
44 system("cat %s >> %s" % (tempFile, outData)) | |
45 system("echo '%s\t%s\t%s' >> %s" % (secondKey, currentSize, entrySize, outIndex)) | |
46 count = count + 1 | 43 count = count + 1 |
47 else: | 44 else: |
48 logFile.write("Skipping existing entry %s.\n" % secondKey) | 45 logFile.write("Skipping existing entry %s.\n" % secondKey) |
49 logFile.write("Added %s entries.\n" % count) | 46 logFile.write("Added %s entries.\n" % count) |
50 logFile.close() | 47 logFile.close() |
54 parser = argparse.ArgumentParser(description='DBKit - Merge database pair.') | 51 parser = argparse.ArgumentParser(description='DBKit - Merge database pair.') |
55 parser.add_argument('-fi', '--firstindex', help='First Index file', required=True) | 52 parser.add_argument('-fi', '--firstindex', help='First Index file', required=True) |
56 parser.add_argument('-fd', '--firstdata', help='First Data file', required=True) | 53 parser.add_argument('-fd', '--firstdata', help='First Data file', required=True) |
57 parser.add_argument('-si', '--secondindex', help='Second Index file', required=True) | 54 parser.add_argument('-si', '--secondindex', help='Second Index file', required=True) |
58 parser.add_argument('-sd', '--seconddata', help='Second Data file', required=True) | 55 parser.add_argument('-sd', '--seconddata', help='Second Data file', required=True) |
59 parser.add_argument('-oi', '--outindex', help='Output Index file', required=True) | 56 parser.add_argument('-oi', '--outputindex', help='Output Index file', required=True) |
60 parser.add_argument('-od', '--outdata', help='Output Data file', required=True) | 57 parser.add_argument('-od', '--outputdatabase', help='Output Data file', required=True) |
61 parser.add_argument('-log', '--log', help='Log file', required=True) | 58 parser.add_argument('-log', '--log', help='Log file', required=True) |
62 args = parser.parse_args() | 59 args = parser.parse_args() |
63 main(args) | 60 main(args) |