Mercurial > repos > guerler > dbkit
annotate dbkit_extract.py @ 2:81c7d4668a7e draft
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
author | guerler |
---|---|
date | Wed, 16 Dec 2020 12:02:48 +0000 |
parents | |
children | 03e124ff7e26 |
rev | line source |
---|---|
2
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
1 #! /usr/bin/env python3 |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
2 import argparse |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
3 from os import system |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
4 from os.path import getsize |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
5 |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
6 from dbkit_package.DBKit import DBKit |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
7 |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
8 |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
9 def main(args): |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
10 logFile = open(args.log, "w") |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
11 outIndex = args.outindex |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
12 outData = args.outdata |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
13 entries = list() |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
14 with open(args.list, "r") as f: |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
15 for line in f: |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
16 name = line.split()[0] |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
17 entries.append(name) |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
18 logFile.write("Detected %s entries.\n" % len(entries)) |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
19 tempFile = "temp.dat" |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
20 count = 0 |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
21 dbkit = DBKit(args.index, args.database) |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
22 for entry in sorted(entries): |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
23 success = dbkit.createFile(entry, tempFile) |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
24 if success: |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
25 currentSize = getsize(outData) |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
26 entrySize = getsize(tempFile) |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
27 system("cat %s >> %s" % (tempFile, outData)) |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
28 system("echo '%s\t%s\t%s' >> %s" % (entry, currentSize, entrySize, outIndex)) |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
29 count = count + 1 |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
30 else: |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
31 logFile.write("Entry %s not found.\n" % entry) |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
32 logFile.write("Extracted %s entries.\n" % count) |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
33 logFile.close() |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
34 |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
35 |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
36 if __name__ == "__main__": |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
37 parser = argparse.ArgumentParser(description='DBKit - Merge database pair.') |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
38 parser.add_argument('-l', '--list', help='List of entries to be extracted', required=True) |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
39 parser.add_argument('-i', '--index', help='Database Index file (ffindex)', required=True) |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
40 parser.add_argument('-d', '--database', help='Database Data file (ffdata)', required=True) |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
41 parser.add_argument('-oi', '--outindex', help='Output Index file', required=True) |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
42 parser.add_argument('-od', '--outdata', help='Output Data file', required=True) |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
43 parser.add_argument('-g', '--log', help='Log file', required=True) |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
44 args = parser.parse_args() |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
diff
changeset
|
45 main(args) |