view dbkit_create.py @ 2:81c7d4668a7e draft

"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
author guerler
date Wed, 16 Dec 2020 12:02:48 +0000
parents 987e55ea29b8
children 03e124ff7e26
line wrap: on
line source

#! /usr/bin/env python3
import argparse
from os import system
from os.path import isfile, getsize


def getIdentifiers(args):
    entries = set()
    with open(args.list) as file:
        for line in file:
            entry = line.split()[0]
            idLength = int(args.idlength)
            if idLength > 0:
                entry = entry[:idLength]
            if args.idcase == "lower":
                entry = entry.lower()
            elif args.idcase == "upper":
                entry = entry.upper()
            if args.idextension:
                entry = "%s.%s" % (entry, args.idextension)
            entries.add(entry)
    return sorted(entries)


def main(args):
    entries = getIdentifiers(args)
    logFile = open(args.log, "w")
    logFile.write("Found %s entries.\n" % len(entries))
    outputIndex = args.index
    outputDatabase = args.database
    system("rm -f %s" % outputDatabase)
    indexFile = open(outputIndex, 'w')
    start = 0
    for entryId in entries:
        logFile.write("Loading %s.\n" % entryId)
        if args.url:
            fileName = "temp.dat"
            system("wget -q -O %s %s%s" % (fileName, args.url, entryId))
        else:
            pathName = args.path.rstrip("/")
            fileName = "%s/%s" % (pathName, entryId)
        if isfile(fileName):
            size = getsize(fileName)
            if size == 0:
                logFile.write("Entry `%s` not found.\n" % entryId)
            else:
                indexFile.write("%s\t%d\t%d\n" % (entryId, start, size))
                start = start + size
                system("cat %s >> %s" % (fileName, outputDatabase))
        else:
            logFile.write("Content not found: %s.\n" % fileName)
        logFile.flush()
    logFile.close()


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='DBKit - Download and Merge files into a single file.')
    parser.add_argument('-l', '--list', help='List of entries', required=True)
    parser.add_argument('-u', '--url', help='Source Url', required=False)
    parser.add_argument('-p', '--path', help='Path to files', required=False)
    parser.add_argument('-il', '--idlength', help='Format Identifier Length (integer)', required=False, default="0")
    parser.add_argument('-ic', '--idcase', help='Format Identifier Case (lower, upper)', required=False, default=None)
    parser.add_argument('-ie', '--idextension', help='Format Identifier Extension', required=False, default=None)
    parser.add_argument('-o', '--index', help='Output Database Index', required=True)
    parser.add_argument('-d', '--database', help='Output Database', required=True)
    parser.add_argument('-g', '--log', help="Log file", required=True)
    args = parser.parse_args()
    main(args)