Mercurial > repos > guerler > dbkit
comparison dbkit_create.py @ 0:1914107cc967 draft
"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
author | guerler |
---|---|
date | Wed, 25 Nov 2020 17:22:48 +0000 |
parents | |
children | 987e55ea29b8 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:1914107cc967 |
---|---|
1 #! /usr/bin/env python3 | |
2 import argparse | |
3 from os import system | |
4 from os.path import getsize | |
5 | |
6 | |
7 def getIdentifiers(args): | |
8 entries = set() | |
9 with open(args.list) as file: | |
10 for line in file: | |
11 entry = line.split()[0] | |
12 idLength = int(args.idlength) | |
13 if idLength > 0: | |
14 entry = entry[:idLength] | |
15 if args.idcase == "lower": | |
16 entry = entry.lower() | |
17 elif args.idcase == "upper": | |
18 entry = entry.upper() | |
19 if args.idextension is not None: | |
20 entry = "%s.%s" % (entry, args.idextension) | |
21 entries.add(entry) | |
22 return sorted(entries) | |
23 | |
24 | |
25 def main(args): | |
26 entries = getIdentifiers(args) | |
27 logFile = open(args.log, "w") | |
28 logFile.write("Found %s entries.\n" % len(entries)) | |
29 outputIndex = args.index | |
30 outputDatabase = args.database | |
31 tempPath = args.temp.rstrip("/") | |
32 tempFile = "%s/temp.pdb" % tempPath | |
33 system("mkdir -p %s" % tempPath) | |
34 system("rm -f %s" % outputDatabase) | |
35 indexFile = open(outputIndex, 'w') | |
36 start = 0 | |
37 for entryId in entries: | |
38 logFile.write("Loading %s.\n" % entryId) | |
39 system("wget -q -O %s %s%s" % (tempFile, args.url, entryId)) | |
40 tempSize = getsize(tempFile) | |
41 if tempSize == 0: | |
42 logFile.write("Entry `%s` not found.\n" % entryId) | |
43 else: | |
44 indexFile.write("%s\t%d\t%d\n" % (entryId, start, tempSize)) | |
45 start = start + tempSize + 1 | |
46 system("cat %s >> %s" % (tempFile, outputDatabase)) | |
47 logFile.flush() | |
48 logFile.close() | |
49 | |
50 | |
51 if __name__ == "__main__": | |
52 parser = argparse.ArgumentParser(description='DBKit - Download and Merge files into a single file.') | |
53 parser.add_argument('-l', '--list', help='List of entries', required=True) | |
54 parser.add_argument('-u', '--url', help='Source Url', required=True) | |
55 parser.add_argument('-t', '--temp', help='temp', required=True) | |
56 parser.add_argument('-il', '--idlength', help='Format Identifier Length (integer)', required=False, default="0") | |
57 parser.add_argument('-ic', '--idcase', help='Format Identifier Case (lower, upper)', required=False, default=None) | |
58 parser.add_argument('-ie', '--idextension', help='Format Identifier Extension', required=False, default=None) | |
59 parser.add_argument('-o', '--index', help='Output Database Index', required=True) | |
60 parser.add_argument('-d', '--database', help='Output Database', required=True) | |
61 parser.add_argument('-g', '--log', help="Log file", required=True) | |
62 args = parser.parse_args() | |
63 main(args) |