Mercurial > repos > guerler > dbkit
annotate dbkit_create.py @ 3:03e124ff7e26 draft
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
author | guerler |
---|---|
date | Wed, 16 Dec 2020 13:11:35 +0000 |
parents | 81c7d4668a7e |
children |
rev | line source |
---|---|
0
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
1 #! /usr/bin/env python3 |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
2 import argparse |
3
03e124ff7e26
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
2
diff
changeset
|
3 from os import remove |
1
987e55ea29b8
"planemo upload commit ce9026535c3c6da5e97366a4f0b347b0ec572dbc-dirty"
guerler
parents:
0
diff
changeset
|
4 from os.path import isfile, getsize |
3
03e124ff7e26
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
2
diff
changeset
|
5 import wget |
03e124ff7e26
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
2
diff
changeset
|
6 |
03e124ff7e26
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
2
diff
changeset
|
7 from dbkit_package.DBKit import writeEntry |
0
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
8 |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
9 |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
10 def getIdentifiers(args): |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
11 entries = set() |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
12 with open(args.list) as file: |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
13 for line in file: |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
14 entry = line.split()[0] |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
15 idLength = int(args.idlength) |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
16 if idLength > 0: |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
17 entry = entry[:idLength] |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
18 if args.idcase == "lower": |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
19 entry = entry.lower() |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
20 elif args.idcase == "upper": |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
21 entry = entry.upper() |
2
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
1
diff
changeset
|
22 if args.idextension: |
0
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
23 entry = "%s.%s" % (entry, args.idextension) |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
24 entries.add(entry) |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
25 return sorted(entries) |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
26 |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
27 |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
28 def main(args): |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
29 entries = getIdentifiers(args) |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
30 logFile = open(args.log, "w") |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
31 logFile.write("Found %s entries.\n" % len(entries)) |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
32 outputIndex = args.index |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
33 outputDatabase = args.database |
3
03e124ff7e26
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
2
diff
changeset
|
34 if isfile(outputDatabase): |
03e124ff7e26
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
2
diff
changeset
|
35 remove(outputDatabase) |
0
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
36 for entryId in entries: |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
37 logFile.write("Loading %s.\n" % entryId) |
1
987e55ea29b8
"planemo upload commit ce9026535c3c6da5e97366a4f0b347b0ec572dbc-dirty"
guerler
parents:
0
diff
changeset
|
38 if args.url: |
3
03e124ff7e26
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
2
diff
changeset
|
39 fileName = wget.download("%s%s" % (args.url, entryId)) |
0
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
40 else: |
1
987e55ea29b8
"planemo upload commit ce9026535c3c6da5e97366a4f0b347b0ec572dbc-dirty"
guerler
parents:
0
diff
changeset
|
41 pathName = args.path.rstrip("/") |
987e55ea29b8
"planemo upload commit ce9026535c3c6da5e97366a4f0b347b0ec572dbc-dirty"
guerler
parents:
0
diff
changeset
|
42 fileName = "%s/%s" % (pathName, entryId) |
987e55ea29b8
"planemo upload commit ce9026535c3c6da5e97366a4f0b347b0ec572dbc-dirty"
guerler
parents:
0
diff
changeset
|
43 if isfile(fileName): |
3
03e124ff7e26
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
2
diff
changeset
|
44 entrySize = getsize(fileName) |
03e124ff7e26
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
2
diff
changeset
|
45 if entrySize == 0: |
1
987e55ea29b8
"planemo upload commit ce9026535c3c6da5e97366a4f0b347b0ec572dbc-dirty"
guerler
parents:
0
diff
changeset
|
46 logFile.write("Entry `%s` not found.\n" % entryId) |
987e55ea29b8
"planemo upload commit ce9026535c3c6da5e97366a4f0b347b0ec572dbc-dirty"
guerler
parents:
0
diff
changeset
|
47 else: |
3
03e124ff7e26
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
2
diff
changeset
|
48 writeEntry(entryId, fileName, outputIndex, outputDatabase) |
2
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
1
diff
changeset
|
49 else: |
81c7d4668a7e
"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents:
1
diff
changeset
|
50 logFile.write("Content not found: %s.\n" % fileName) |
0
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
51 logFile.flush() |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
52 logFile.close() |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
53 |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
54 |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
55 if __name__ == "__main__": |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
56 parser = argparse.ArgumentParser(description='DBKit - Download and Merge files into a single file.') |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
57 parser.add_argument('-l', '--list', help='List of entries', required=True) |
1
987e55ea29b8
"planemo upload commit ce9026535c3c6da5e97366a4f0b347b0ec572dbc-dirty"
guerler
parents:
0
diff
changeset
|
58 parser.add_argument('-u', '--url', help='Source Url', required=False) |
987e55ea29b8
"planemo upload commit ce9026535c3c6da5e97366a4f0b347b0ec572dbc-dirty"
guerler
parents:
0
diff
changeset
|
59 parser.add_argument('-p', '--path', help='Path to files', required=False) |
0
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
60 parser.add_argument('-il', '--idlength', help='Format Identifier Length (integer)', required=False, default="0") |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
61 parser.add_argument('-ic', '--idcase', help='Format Identifier Case (lower, upper)', required=False, default=None) |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
62 parser.add_argument('-ie', '--idextension', help='Format Identifier Extension', required=False, default=None) |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
63 parser.add_argument('-o', '--index', help='Output Database Index', required=True) |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
64 parser.add_argument('-d', '--database', help='Output Database', required=True) |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
65 parser.add_argument('-g', '--log', help="Log file", required=True) |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
66 args = parser.parse_args() |
1914107cc967
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff
changeset
|
67 main(args) |