diff dbkit_create.py @ 0:1914107cc967 draft

"planemo upload commit 3632646bec5edbe47e06c894e32bfd215b895555-dirty"
author guerler
date Wed, 25 Nov 2020 17:22:48 +0000
parents
children 987e55ea29b8
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/dbkit_create.py	Wed Nov 25 17:22:48 2020 +0000
@@ -0,0 +1,63 @@
+#! /usr/bin/env python3
+import argparse
+from os import system
+from os.path import getsize
+
+
+def getIdentifiers(args):
+    entries = set()
+    with open(args.list) as file:
+        for line in file:
+            entry = line.split()[0]
+            idLength = int(args.idlength)
+            if idLength > 0:
+                entry = entry[:idLength]
+            if args.idcase == "lower":
+                entry = entry.lower()
+            elif args.idcase == "upper":
+                entry = entry.upper()
+            if args.idextension is not None:
+                entry = "%s.%s" % (entry, args.idextension)
+            entries.add(entry)
+    return sorted(entries)
+
+
+def main(args):
+    entries = getIdentifiers(args)
+    logFile = open(args.log, "w")
+    logFile.write("Found %s entries.\n" % len(entries))
+    outputIndex = args.index
+    outputDatabase = args.database
+    tempPath = args.temp.rstrip("/")
+    tempFile = "%s/temp.pdb" % tempPath
+    system("mkdir -p %s" % tempPath)
+    system("rm -f %s" % outputDatabase)
+    indexFile = open(outputIndex, 'w')
+    start = 0
+    for entryId in entries:
+        logFile.write("Loading %s.\n" % entryId)
+        system("wget -q -O %s %s%s" % (tempFile, args.url, entryId))
+        tempSize = getsize(tempFile)
+        if tempSize == 0:
+            logFile.write("Entry `%s` not found.\n" % entryId)
+        else:
+            indexFile.write("%s\t%d\t%d\n" % (entryId, start, tempSize))
+            start = start + tempSize + 1
+            system("cat %s >> %s" % (tempFile, outputDatabase))
+        logFile.flush()
+    logFile.close()
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='DBKit - Download and Merge files into a single file.')
+    parser.add_argument('-l', '--list', help='List of entries', required=True)
+    parser.add_argument('-u', '--url', help='Source Url', required=True)
+    parser.add_argument('-t', '--temp', help='temp', required=True)
+    parser.add_argument('-il', '--idlength', help='Format Identifier Length (integer)', required=False, default="0")
+    parser.add_argument('-ic', '--idcase', help='Format Identifier Case (lower, upper)', required=False, default=None)
+    parser.add_argument('-ie', '--idextension', help='Format Identifier Extension', required=False, default=None)
+    parser.add_argument('-o', '--index', help='Output Database Index', required=True)
+    parser.add_argument('-d', '--database', help='Output Database', required=True)
+    parser.add_argument('-g', '--log', help="Log file", required=True)
+    args = parser.parse_args()
+    main(args)