comparison dbkit_create.py @ 1:987e55ea29b8 draft

"planemo upload commit ce9026535c3c6da5e97366a4f0b347b0ec572dbc-dirty"
author guerler
date Thu, 26 Nov 2020 11:05:51 +0000
parents 1914107cc967
children 81c7d4668a7e
comparison
equal deleted inserted replaced
0:1914107cc967 1:987e55ea29b8
1 #! /usr/bin/env python3 1 #! /usr/bin/env python3
2 import argparse 2 import argparse
3 from os import system 3 from os import system
4 from os.path import getsize 4 from os.path import isfile, getsize
5 5
6 6
7 def getIdentifiers(args): 7 def getIdentifiers(args):
8 entries = set() 8 entries = set()
9 with open(args.list) as file: 9 with open(args.list) as file:
26 entries = getIdentifiers(args) 26 entries = getIdentifiers(args)
27 logFile = open(args.log, "w") 27 logFile = open(args.log, "w")
28 logFile.write("Found %s entries.\n" % len(entries)) 28 logFile.write("Found %s entries.\n" % len(entries))
29 outputIndex = args.index 29 outputIndex = args.index
30 outputDatabase = args.database 30 outputDatabase = args.database
31 tempPath = args.temp.rstrip("/")
32 tempFile = "%s/temp.pdb" % tempPath
33 system("mkdir -p %s" % tempPath)
34 system("rm -f %s" % outputDatabase) 31 system("rm -f %s" % outputDatabase)
35 indexFile = open(outputIndex, 'w') 32 indexFile = open(outputIndex, 'w')
36 start = 0 33 start = 0
37 for entryId in entries: 34 for entryId in entries:
38 logFile.write("Loading %s.\n" % entryId) 35 logFile.write("Loading %s.\n" % entryId)
39 system("wget -q -O %s %s%s" % (tempFile, args.url, entryId)) 36 if args.url:
40 tempSize = getsize(tempFile) 37 fileName = "temp.dat"
41 if tempSize == 0: 38 system("wget -q -O %s %s%s" % (fileName, args.url, entryId))
42 logFile.write("Entry `%s` not found.\n" % entryId)
43 else: 39 else:
44 indexFile.write("%s\t%d\t%d\n" % (entryId, start, tempSize)) 40 pathName = args.path.rstrip("/")
45 start = start + tempSize + 1 41 fileName = "%s/%s" % (pathName, entryId)
46 system("cat %s >> %s" % (tempFile, outputDatabase)) 42 if isfile(fileName):
43 size = getsize(fileName)
44 if size == 0:
45 logFile.write("Entry `%s` not found.\n" % entryId)
46 else:
47 indexFile.write("%s\t%d\t%d\n" % (entryId, start, size))
48 start = start + size
49 system("cat %s >> %s" % (fileName, outputDatabase))
47 logFile.flush() 50 logFile.flush()
48 logFile.close() 51 logFile.close()
49 52
50 53
51 if __name__ == "__main__": 54 if __name__ == "__main__":
52 parser = argparse.ArgumentParser(description='DBKit - Download and Merge files into a single file.') 55 parser = argparse.ArgumentParser(description='DBKit - Download and Merge files into a single file.')
53 parser.add_argument('-l', '--list', help='List of entries', required=True) 56 parser.add_argument('-l', '--list', help='List of entries', required=True)
54 parser.add_argument('-u', '--url', help='Source Url', required=True) 57 parser.add_argument('-u', '--url', help='Source Url', required=False)
55 parser.add_argument('-t', '--temp', help='temp', required=True) 58 parser.add_argument('-p', '--path', help='Path to files', required=False)
56 parser.add_argument('-il', '--idlength', help='Format Identifier Length (integer)', required=False, default="0") 59 parser.add_argument('-il', '--idlength', help='Format Identifier Length (integer)', required=False, default="0")
57 parser.add_argument('-ic', '--idcase', help='Format Identifier Case (lower, upper)', required=False, default=None) 60 parser.add_argument('-ic', '--idcase', help='Format Identifier Case (lower, upper)', required=False, default=None)
58 parser.add_argument('-ie', '--idextension', help='Format Identifier Extension', required=False, default=None) 61 parser.add_argument('-ie', '--idextension', help='Format Identifier Extension', required=False, default=None)
59 parser.add_argument('-o', '--index', help='Output Database Index', required=True) 62 parser.add_argument('-o', '--index', help='Output Database Index', required=True)
60 parser.add_argument('-d', '--database', help='Output Database', required=True) 63 parser.add_argument('-d', '--database', help='Output Database', required=True)