# HG changeset patch # User guerler # Date 1608124295 0 # Node ID 03e124ff7e2619914fec2581f980689cf101f43b # Parent 81c7d4668a7ee54eafdf4786aaeb4d22c3d1026d "planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty" diff -r 81c7d4668a7e -r 03e124ff7e26 dbkit_create.py --- a/dbkit_create.py Wed Dec 16 12:02:48 2020 +0000 +++ b/dbkit_create.py Wed Dec 16 13:11:35 2020 +0000 @@ -1,7 +1,10 @@ #! /usr/bin/env python3 import argparse -from os import system +from os import remove from os.path import isfile, getsize +import wget + +from dbkit_package.DBKit import writeEntry def getIdentifiers(args): @@ -28,25 +31,21 @@ logFile.write("Found %s entries.\n" % len(entries)) outputIndex = args.index outputDatabase = args.database - system("rm -f %s" % outputDatabase) - indexFile = open(outputIndex, 'w') - start = 0 + if isfile(outputDatabase): + remove(outputDatabase) for entryId in entries: logFile.write("Loading %s.\n" % entryId) if args.url: - fileName = "temp.dat" - system("wget -q -O %s %s%s" % (fileName, args.url, entryId)) + fileName = wget.download("%s%s" % (args.url, entryId)) else: pathName = args.path.rstrip("/") fileName = "%s/%s" % (pathName, entryId) if isfile(fileName): - size = getsize(fileName) - if size == 0: + entrySize = getsize(fileName) + if entrySize == 0: logFile.write("Entry `%s` not found.\n" % entryId) else: - indexFile.write("%s\t%d\t%d\n" % (entryId, start, size)) - start = start + size - system("cat %s >> %s" % (fileName, outputDatabase)) + writeEntry(entryId, fileName, outputIndex, outputDatabase) else: logFile.write("Content not found: %s.\n" % fileName) logFile.flush() diff -r 81c7d4668a7e -r 03e124ff7e26 dbkit_create.xml --- a/dbkit_create.xml Wed Dec 16 12:02:48 2020 +0000 +++ b/dbkit_create.xml Wed Dec 16 13:11:35 2020 +0000 @@ -1,5 +1,8 @@ - database generator + database + + python-wget + > %s" % (tempFile, outData)) - system("echo '%s\t%s\t%s' >> %s" % (entry, currentSize, entrySize, outIndex)) + writeEntry(entry, fileName, outputIndex, outputDatabase) count = count + 1 else: logFile.write("Entry %s not found.\n" % entry) @@ -38,8 +33,8 @@ parser.add_argument('-l', '--list', help='List of entries to be extracted', required=True) parser.add_argument('-i', '--index', help='Database Index file (ffindex)', required=True) parser.add_argument('-d', '--database', help='Database Data file (ffdata)', required=True) - parser.add_argument('-oi', '--outindex', help='Output Index file', required=True) - parser.add_argument('-od', '--outdata', help='Output Data file', required=True) + parser.add_argument('-oi', '--outputindex', help='Output Index file', required=True) + parser.add_argument('-od', '--outputdatabase', help='Output Data file', required=True) parser.add_argument('-g', '--log', help='Log file', required=True) args = parser.parse_args() main(args) diff -r 81c7d4668a7e -r 03e124ff7e26 dbkit_merge.py --- a/dbkit_merge.py Wed Dec 16 12:02:48 2020 +0000 +++ b/dbkit_merge.py Wed Dec 16 13:11:35 2020 +0000 @@ -1,15 +1,15 @@ #! /usr/bin/env python3 import argparse -from os import system from os.path import getsize +from shutil import copyfile -from dbkit_package.DBKit import DBKit +from dbkit_package.DBKit import DBKit, writeEntry def main(args): logFile = open(args.log, "w") - outIndex = args.outindex - outData = args.outdata + outputIndex = args.outputindex + outputDatabase = args.outputdatabase if getsize(args.firstindex) > getsize(args.secondindex): firstIndex = args.firstindex firstData = args.firstdata @@ -20,8 +20,8 @@ firstData = args.seconddata secondIndex = args.firstindex secondData = args.firstdata - system("cp %s %s" % (firstIndex, outIndex)) - system("cp %s %s" % (firstData, outData)) + copyfile(firstIndex, outputIndex) + copyfile(firstData, outputDatabase) firstEntries = set() with open(firstIndex, "r") as f: for line in f: @@ -33,16 +33,13 @@ for line in f: name = line.split()[0] secondEntries.append(name) - tempFile = "temp.dat" + fileName = "temp.dat" count = 0 dbkit = DBKit(secondIndex, secondData) for secondKey in secondEntries: if secondKey not in firstEntries: - dbkit.createFile(secondKey, tempFile) - entrySize = getsize(tempFile) - currentSize = getsize(outData) - system("cat %s >> %s" % (tempFile, outData)) - system("echo '%s\t%s\t%s' >> %s" % (secondKey, currentSize, entrySize, outIndex)) + dbkit.createFile(secondKey, fileName) + writeEntry(secondKey, fileName, outputIndex, outputDatabase) count = count + 1 else: logFile.write("Skipping existing entry %s.\n" % secondKey) @@ -56,8 +53,8 @@ parser.add_argument('-fd', '--firstdata', help='First Data file', required=True) parser.add_argument('-si', '--secondindex', help='Second Index file', required=True) parser.add_argument('-sd', '--seconddata', help='Second Data file', required=True) - parser.add_argument('-oi', '--outindex', help='Output Index file', required=True) - parser.add_argument('-od', '--outdata', help='Output Data file', required=True) + parser.add_argument('-oi', '--outputindex', help='Output Index file', required=True) + parser.add_argument('-od', '--outputdatabase', help='Output Data file', required=True) parser.add_argument('-log', '--log', help='Log file', required=True) args = parser.parse_args() main(args) diff -r 81c7d4668a7e -r 03e124ff7e26 dbkit_package/DBKit.py --- a/dbkit_package/DBKit.py Wed Dec 16 12:02:48 2020 +0000 +++ b/dbkit_package/DBKit.py Wed Dec 16 13:11:35 2020 +0000 @@ -1,3 +1,6 @@ +from os.path import isfile, getsize + + class DBKit: def __init__(self, indexFile, databaseFile): self.databaseFile = databaseFile @@ -30,3 +33,26 @@ def getIndex(self): return self.index + + +def writeEntry(identifier, fileName, outputIndex, outputDatabase): + if isfile(outputDatabase): + currentSize = getsize(outputDatabase) + else: + currentSize = 0 + if isfile(fileName): + entrySize = getsize(fileName) + else: + entrySize = 0 + if entrySize > 0: + outputIndexFile = open(outputIndex, "a+") + outputIndexFile.write("%s\t%s\t%s\n" % (identifier, currentSize, entrySize)) + tempFile = open(fileName, "r") + databaseFile = open(outputDatabase, "a+") + databaseFile.write(tempFile.read()) + databaseFile.close() + tempFile.close() + outputIndexFile.close() + return True + else: + return False diff -r 81c7d4668a7e -r 03e124ff7e26 temp.dat diff -r 81c7d4668a7e -r 03e124ff7e26 test-data/create/pdb.tabular --- a/test-data/create/pdb.tabular Wed Dec 16 12:02:48 2020 +0000 +++ b/test-data/create/pdb.tabular Wed Dec 16 13:11:35 2020 +0000 @@ -1,4 +1,5 @@ 10gs.pdb +none.pdb 117e.pdb 11as.pdb 11ba.pdb