annotate dbkit_create.py @ 2:81c7d4668a7e draft

"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
author guerler
date Wed, 16 Dec 2020 12:02:48 +0000
parents 987e55ea29b8
children 03e124ff7e26
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
1 #! /usr/bin/env python3
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
2 import argparse
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
3 from os import system
1
987e55ea29b8 "planemo upload commit ce9026535c3c6da5e97366a4f0b347b0ec572dbc-dirty"
guerler
parents: 0
diff changeset
4 from os.path import isfile, getsize
0
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
5
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
6
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
7 def getIdentifiers(args):
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
8 entries = set()
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
9 with open(args.list) as file:
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
10 for line in file:
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
11 entry = line.split()[0]
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
12 idLength = int(args.idlength)
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
13 if idLength > 0:
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
14 entry = entry[:idLength]
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
15 if args.idcase == "lower":
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
16 entry = entry.lower()
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
17 elif args.idcase == "upper":
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
18 entry = entry.upper()
2
81c7d4668a7e "planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents: 1
diff changeset
19 if args.idextension:
0
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
20 entry = "%s.%s" % (entry, args.idextension)
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
21 entries.add(entry)
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
22 return sorted(entries)
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
23
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
24
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
25 def main(args):
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
26 entries = getIdentifiers(args)
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
27 logFile = open(args.log, "w")
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
28 logFile.write("Found %s entries.\n" % len(entries))
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
29 outputIndex = args.index
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
30 outputDatabase = args.database
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
31 system("rm -f %s" % outputDatabase)
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
32 indexFile = open(outputIndex, 'w')
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
33 start = 0
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
34 for entryId in entries:
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
35 logFile.write("Loading %s.\n" % entryId)
1
987e55ea29b8 "planemo upload commit ce9026535c3c6da5e97366a4f0b347b0ec572dbc-dirty"
guerler
parents: 0
diff changeset
36 if args.url:
987e55ea29b8 "planemo upload commit ce9026535c3c6da5e97366a4f0b347b0ec572dbc-dirty"
guerler
parents: 0
diff changeset
37 fileName = "temp.dat"
987e55ea29b8 "planemo upload commit ce9026535c3c6da5e97366a4f0b347b0ec572dbc-dirty"
guerler
parents: 0
diff changeset
38 system("wget -q -O %s %s%s" % (fileName, args.url, entryId))
0
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
39 else:
1
987e55ea29b8 "planemo upload commit ce9026535c3c6da5e97366a4f0b347b0ec572dbc-dirty"
guerler
parents: 0
diff changeset
40 pathName = args.path.rstrip("/")
987e55ea29b8 "planemo upload commit ce9026535c3c6da5e97366a4f0b347b0ec572dbc-dirty"
guerler
parents: 0
diff changeset
41 fileName = "%s/%s" % (pathName, entryId)
987e55ea29b8 "planemo upload commit ce9026535c3c6da5e97366a4f0b347b0ec572dbc-dirty"
guerler
parents: 0
diff changeset
42 if isfile(fileName):
987e55ea29b8 "planemo upload commit ce9026535c3c6da5e97366a4f0b347b0ec572dbc-dirty"
guerler
parents: 0
diff changeset
43 size = getsize(fileName)
987e55ea29b8 "planemo upload commit ce9026535c3c6da5e97366a4f0b347b0ec572dbc-dirty"
guerler
parents: 0
diff changeset
44 if size == 0:
987e55ea29b8 "planemo upload commit ce9026535c3c6da5e97366a4f0b347b0ec572dbc-dirty"
guerler
parents: 0
diff changeset
45 logFile.write("Entry `%s` not found.\n" % entryId)
987e55ea29b8 "planemo upload commit ce9026535c3c6da5e97366a4f0b347b0ec572dbc-dirty"
guerler
parents: 0
diff changeset
46 else:
987e55ea29b8 "planemo upload commit ce9026535c3c6da5e97366a4f0b347b0ec572dbc-dirty"
guerler
parents: 0
diff changeset
47 indexFile.write("%s\t%d\t%d\n" % (entryId, start, size))
987e55ea29b8 "planemo upload commit ce9026535c3c6da5e97366a4f0b347b0ec572dbc-dirty"
guerler
parents: 0
diff changeset
48 start = start + size
987e55ea29b8 "planemo upload commit ce9026535c3c6da5e97366a4f0b347b0ec572dbc-dirty"
guerler
parents: 0
diff changeset
49 system("cat %s >> %s" % (fileName, outputDatabase))
2
81c7d4668a7e "planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents: 1
diff changeset
50 else:
81c7d4668a7e "planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
guerler
parents: 1
diff changeset
51 logFile.write("Content not found: %s.\n" % fileName)
0
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
52 logFile.flush()
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
53 logFile.close()
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
54
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
55
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
56 if __name__ == "__main__":
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
57 parser = argparse.ArgumentParser(description='DBKit - Download and Merge files into a single file.')
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
58 parser.add_argument('-l', '--list', help='List of entries', required=True)
1
987e55ea29b8 "planemo upload commit ce9026535c3c6da5e97366a4f0b347b0ec572dbc-dirty"
guerler
parents: 0
diff changeset
59 parser.add_argument('-u', '--url', help='Source Url', required=False)
987e55ea29b8 "planemo upload commit ce9026535c3c6da5e97366a4f0b347b0ec572dbc-dirty"
guerler
parents: 0
diff changeset
60 parser.add_argument('-p', '--path', help='Path to files', required=False)
0
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
61 parser.add_argument('-il', '--idlength', help='Format Identifier Length (integer)', required=False, default="0")
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
62 parser.add_argument('-ic', '--idcase', help='Format Identifier Case (lower, upper)', required=False, default=None)
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
63 parser.add_argument('-ie', '--idextension', help='Format Identifier Extension', required=False, default=None)
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
64 parser.add_argument('-o', '--index', help='Output Database Index', required=True)
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
65 parser.add_argument('-d', '--database', help='Output Database', required=True)
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
66 parser.add_argument('-g', '--log', help="Log file", required=True)
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
67 args = parser.parse_args()
1914107cc967 "planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
guerler
parents:
diff changeset
68 main(args)