changeset 3:03e124ff7e26 draft

"planemo upload commit bd03b7888eab0b010acfc3affd38bf4d4e2bb1ef-dirty"
author guerler
date Wed, 16 Dec 2020 13:11:35 +0000
parents 81c7d4668a7e
children 58de7c3926cc
files dbkit_create.py dbkit_create.xml dbkit_extract.py dbkit_merge.py dbkit_package/DBKit.py temp.dat test-data/create/pdb.tabular
diffstat 6 files changed, 60 insertions(+), 39 deletions(-) [+]
line wrap: on
line diff
--- a/dbkit_create.py	Wed Dec 16 12:02:48 2020 +0000
+++ b/dbkit_create.py	Wed Dec 16 13:11:35 2020 +0000
@@ -1,7 +1,10 @@
 #! /usr/bin/env python3
 import argparse
-from os import system
+from os import remove
 from os.path import isfile, getsize
+import wget
+
+from dbkit_package.DBKit import writeEntry
 
 
 def getIdentifiers(args):
@@ -28,25 +31,21 @@
     logFile.write("Found %s entries.\n" % len(entries))
     outputIndex = args.index
     outputDatabase = args.database
-    system("rm -f %s" % outputDatabase)
-    indexFile = open(outputIndex, 'w')
-    start = 0
+    if isfile(outputDatabase):
+        remove(outputDatabase)
     for entryId in entries:
         logFile.write("Loading %s.\n" % entryId)
         if args.url:
-            fileName = "temp.dat"
-            system("wget -q -O %s %s%s" % (fileName, args.url, entryId))
+            fileName = wget.download("%s%s" % (args.url, entryId))
         else:
             pathName = args.path.rstrip("/")
             fileName = "%s/%s" % (pathName, entryId)
         if isfile(fileName):
-            size = getsize(fileName)
-            if size == 0:
+            entrySize = getsize(fileName)
+            if entrySize == 0:
                 logFile.write("Entry `%s` not found.\n" % entryId)
             else:
-                indexFile.write("%s\t%d\t%d\n" % (entryId, start, size))
-                start = start + size
-                system("cat %s >> %s" % (fileName, outputDatabase))
+                writeEntry(entryId, fileName, outputIndex, outputDatabase)
         else:
             logFile.write("Content not found: %s.\n" % fileName)
         logFile.flush()
--- a/dbkit_create.xml	Wed Dec 16 12:02:48 2020 +0000
+++ b/dbkit_create.xml	Wed Dec 16 13:11:35 2020 +0000
@@ -1,5 +1,8 @@
 <tool id="dbkit_create" name="DBKit Create" version="0.1.1" python_template_version="3.5" license="MIT">
-    <description>database generator</description>
+    <description>database</description>
+     <requirements>
+        <requirement type="package" version="3.2">python-wget</requirement>
+    </requirements>
     <command detect_errors="exit_code"><![CDATA[
         #if str($source.type) == "false":
             mkdir -p inputs &&
--- a/dbkit_extract.py	Wed Dec 16 12:02:48 2020 +0000
+++ b/dbkit_extract.py	Wed Dec 16 13:11:35 2020 +0000
@@ -1,31 +1,26 @@
 #! /usr/bin/env python3
 import argparse
-from os import system
-from os.path import getsize
 
-from dbkit_package.DBKit import DBKit
+from dbkit_package.DBKit import DBKit, writeEntry
 
 
 def main(args):
     logFile = open(args.log, "w")
-    outIndex = args.outindex
-    outData = args.outdata
+    outputIndex = args.outputindex
+    outputDatabase = args.outputdatabase
     entries = list()
     with open(args.list, "r") as f:
         for line in f:
             name = line.split()[0]
             entries.append(name)
     logFile.write("Detected %s entries.\n" % len(entries))
-    tempFile = "temp.dat"
+    fileName = "temp.dat"
     count = 0
     dbkit = DBKit(args.index, args.database)
     for entry in sorted(entries):
-        success = dbkit.createFile(entry, tempFile)
+        success = dbkit.createFile(entry, fileName)
         if success:
-            currentSize = getsize(outData)
-            entrySize = getsize(tempFile)
-            system("cat %s >> %s" % (tempFile, outData))
-            system("echo '%s\t%s\t%s' >> %s" % (entry, currentSize, entrySize, outIndex))
+            writeEntry(entry, fileName, outputIndex, outputDatabase)
             count = count + 1
         else:
             logFile.write("Entry %s not found.\n" % entry)
@@ -38,8 +33,8 @@
     parser.add_argument('-l', '--list', help='List of entries to be extracted', required=True)
     parser.add_argument('-i', '--index', help='Database Index file (ffindex)', required=True)
     parser.add_argument('-d', '--database', help='Database Data file (ffdata)', required=True)
-    parser.add_argument('-oi', '--outindex', help='Output Index file', required=True)
-    parser.add_argument('-od', '--outdata', help='Output Data file', required=True)
+    parser.add_argument('-oi', '--outputindex', help='Output Index file', required=True)
+    parser.add_argument('-od', '--outputdatabase', help='Output Data file', required=True)
     parser.add_argument('-g', '--log', help='Log file', required=True)
     args = parser.parse_args()
     main(args)
--- a/dbkit_merge.py	Wed Dec 16 12:02:48 2020 +0000
+++ b/dbkit_merge.py	Wed Dec 16 13:11:35 2020 +0000
@@ -1,15 +1,15 @@
 #! /usr/bin/env python3
 import argparse
-from os import system
 from os.path import getsize
+from shutil import copyfile
 
-from dbkit_package.DBKit import DBKit
+from dbkit_package.DBKit import DBKit, writeEntry
 
 
 def main(args):
     logFile = open(args.log, "w")
-    outIndex = args.outindex
-    outData = args.outdata
+    outputIndex = args.outputindex
+    outputDatabase = args.outputdatabase
     if getsize(args.firstindex) > getsize(args.secondindex):
         firstIndex = args.firstindex
         firstData = args.firstdata
@@ -20,8 +20,8 @@
         firstData = args.seconddata
         secondIndex = args.firstindex
         secondData = args.firstdata
-    system("cp %s %s" % (firstIndex, outIndex))
-    system("cp %s %s" % (firstData, outData))
+    copyfile(firstIndex, outputIndex)
+    copyfile(firstData, outputDatabase)
     firstEntries = set()
     with open(firstIndex, "r") as f:
         for line in f:
@@ -33,16 +33,13 @@
         for line in f:
             name = line.split()[0]
             secondEntries.append(name)
-    tempFile = "temp.dat"
+    fileName = "temp.dat"
     count = 0
     dbkit = DBKit(secondIndex, secondData)
     for secondKey in secondEntries:
         if secondKey not in firstEntries:
-            dbkit.createFile(secondKey, tempFile)
-            entrySize = getsize(tempFile)
-            currentSize = getsize(outData)
-            system("cat %s >> %s" % (tempFile, outData))
-            system("echo '%s\t%s\t%s' >> %s" % (secondKey, currentSize, entrySize, outIndex))
+            dbkit.createFile(secondKey, fileName)
+            writeEntry(secondKey, fileName, outputIndex, outputDatabase)
             count = count + 1
         else:
             logFile.write("Skipping existing entry %s.\n" % secondKey)
@@ -56,8 +53,8 @@
     parser.add_argument('-fd', '--firstdata', help='First Data file', required=True)
     parser.add_argument('-si', '--secondindex', help='Second Index file', required=True)
     parser.add_argument('-sd', '--seconddata', help='Second Data file', required=True)
-    parser.add_argument('-oi', '--outindex', help='Output Index file', required=True)
-    parser.add_argument('-od', '--outdata', help='Output Data file', required=True)
+    parser.add_argument('-oi', '--outputindex', help='Output Index file', required=True)
+    parser.add_argument('-od', '--outputdatabase', help='Output Data file', required=True)
     parser.add_argument('-log', '--log', help='Log file', required=True)
     args = parser.parse_args()
     main(args)
--- a/dbkit_package/DBKit.py	Wed Dec 16 12:02:48 2020 +0000
+++ b/dbkit_package/DBKit.py	Wed Dec 16 13:11:35 2020 +0000
@@ -1,3 +1,6 @@
+from os.path import isfile, getsize
+
+
 class DBKit:
     def __init__(self, indexFile, databaseFile):
         self.databaseFile = databaseFile
@@ -30,3 +33,26 @@
 
     def getIndex(self):
         return self.index
+
+
+def writeEntry(identifier, fileName, outputIndex, outputDatabase):
+    if isfile(outputDatabase):
+        currentSize = getsize(outputDatabase)
+    else:
+        currentSize = 0
+    if isfile(fileName):
+        entrySize = getsize(fileName)
+    else:
+        entrySize = 0
+    if entrySize > 0:
+        outputIndexFile = open(outputIndex, "a+")
+        outputIndexFile.write("%s\t%s\t%s\n" % (identifier, currentSize, entrySize))
+        tempFile = open(fileName, "r")
+        databaseFile = open(outputDatabase, "a+")
+        databaseFile.write(tempFile.read())
+        databaseFile.close()
+        tempFile.close()
+        outputIndexFile.close()
+        return True
+    else:
+        return False
--- a/test-data/create/pdb.tabular	Wed Dec 16 12:02:48 2020 +0000
+++ b/test-data/create/pdb.tabular	Wed Dec 16 13:11:35 2020 +0000
@@ -1,4 +1,5 @@
 10gs.pdb
+none.pdb
 117e.pdb
 11as.pdb
 11ba.pdb