Mercurial > repos > guerler > dbkit
annotate dbkit_merge.py @ 3:03e124ff7e26 draft
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
author | guerler |
---|---|
date | Wed, 16 Dec 2020 13:11:35 +0000 |
parents | 81c7d4668a7e |
children |
rev | line source |
---|---|
2
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
1 #! /usr/bin/env python3 |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
2 import argparse |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
3 from os.path import getsize |
3
03e124ff7e26
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
2
diff
changeset
|
4 from shutil import copyfile |
2
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
5 |
3
03e124ff7e26
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
2
diff
changeset
|
6 from dbkit_package.DBKit import DBKit, writeEntry |
2
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
7 |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
8 |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
9 def main(args): |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
10 logFile = open(args.log, "w") |
3
03e124ff7e26
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
2
diff
changeset
|
11 outputIndex = args.outputindex |
03e124ff7e26
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
2
diff
changeset
|
12 outputDatabase = args.outputdatabase |
2
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
13 if getsize(args.firstindex) > getsize(args.secondindex): |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
14 firstIndex = args.firstindex |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
15 firstData = args.firstdata |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
16 secondIndex = args.secondindex |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
17 secondData = args.seconddata |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
18 else: |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
19 firstIndex = args.secondindex |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
20 firstData = args.seconddata |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
21 secondIndex = args.firstindex |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
22 secondData = args.firstdata |
3
03e124ff7e26
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
2
diff
changeset
|
23 copyfile(firstIndex, outputIndex) |
03e124ff7e26
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
2
diff
changeset
|
24 copyfile(firstData, outputDatabase) |
2
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
25 firstEntries = set() |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
26 with open(firstIndex, "r") as f: |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
27 for line in f: |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
28 name = line.split()[0] |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
29 firstEntries.add(name) |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
30 logFile.write("Detected %s entries.\n" % len(firstEntries)) |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
31 secondEntries = list() |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
32 with open(secondIndex, "r") as f: |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
33 for line in f: |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
34 name = line.split()[0] |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
35 secondEntries.append(name) |
3
03e124ff7e26
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
2
diff
changeset
|
36 fileName = "temp.dat" |
2
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
37 count = 0 |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
38 dbkit = DBKit(secondIndex, secondData) |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
39 for secondKey in secondEntries: |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
40 if secondKey not in firstEntries: |
3
03e124ff7e26
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
2
diff
changeset
|
41 dbkit.createFile(secondKey, fileName) |
03e124ff7e26
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
2
diff
changeset
|
42 writeEntry(secondKey, fileName, outputIndex, outputDatabase) |
2
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
43 count = count + 1 |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
44 else: |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
45 logFile.write("Skipping existing entry %s.\n" % secondKey) |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
46 logFile.write("Added %s entries.\n" % count) |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
47 logFile.close() |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
48 |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
49 |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
50 if __name__ == "__main__": |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
51 parser = argparse.ArgumentParser(description='DBKit - Merge database pair.') |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
52 parser.add_argument('-fi', '--firstindex', help='First Index file', required=True) |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
53 parser.add_argument('-fd', '--firstdata', help='First Data file', required=True) |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
54 parser.add_argument('-si', '--secondindex', help='Second Index file', required=True) |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
55 parser.add_argument('-sd', '--seconddata', help='Second Data file', required=True) |
3
03e124ff7e26
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
2
diff
changeset
|
56 parser.add_argument('-oi', '--outputindex', help='Output Index file', required=True) |
03e124ff7e26
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
2
diff
changeset
|
57 parser.add_argument('-od', '--outputdatabase', help='Output Data file', required=True) |
2
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
58 parser.add_argument('-log', '--log', help='Log file', required=True) |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
59 args = parser.parse_args() |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
60 main(args) |