Mercurial > repos > guerler > dbkit
view dbkit_merge.py @ 4:58de7c3926cc draft
"planemo upload commit c6112b37070456c582e915244105c4c63f4cebdb"
author | guerler |
---|---|
date | Mon, 25 Jan 2021 04:23:15 +0000 |
parents | 03e124ff7e26 |
children |
line wrap: on
line source
#! /usr/bin/env python3 import argparse from os.path import getsize from shutil import copyfile from dbkit_package.DBKit import DBKit, writeEntry def main(args): logFile = open(args.log, "w") outputIndex = args.outputindex outputDatabase = args.outputdatabase if getsize(args.firstindex) > getsize(args.secondindex): firstIndex = args.firstindex firstData = args.firstdata secondIndex = args.secondindex secondData = args.seconddata else: firstIndex = args.secondindex firstData = args.seconddata secondIndex = args.firstindex secondData = args.firstdata copyfile(firstIndex, outputIndex) copyfile(firstData, outputDatabase) firstEntries = set() with open(firstIndex, "r") as f: for line in f: name = line.split()[0] firstEntries.add(name) logFile.write("Detected %s entries.\n" % len(firstEntries)) secondEntries = list() with open(secondIndex, "r") as f: for line in f: name = line.split()[0] secondEntries.append(name) fileName = "temp.dat" count = 0 dbkit = DBKit(secondIndex, secondData) for secondKey in secondEntries: if secondKey not in firstEntries: dbkit.createFile(secondKey, fileName) writeEntry(secondKey, fileName, outputIndex, outputDatabase) count = count + 1 else: logFile.write("Skipping existing entry %s.\n" % secondKey) logFile.write("Added %s entries.\n" % count) logFile.close() if __name__ == "__main__": parser = argparse.ArgumentParser(description='DBKit - Merge database pair.') parser.add_argument('-fi', '--firstindex', help='First Index file', required=True) parser.add_argument('-fd', '--firstdata', help='First Data file', required=True) parser.add_argument('-si', '--secondindex', help='Second Index file', required=True) parser.add_argument('-sd', '--seconddata', help='Second Data file', required=True) parser.add_argument('-oi', '--outputindex', help='Output Index file', required=True) parser.add_argument('-od', '--outputdatabase', help='Output Data file', required=True) parser.add_argument('-log', '--log', help='Log file', required=True) args = parser.parse_args() main(args)