changeset 1:677344fb75c1 draft

Update the wrapper to handle output log and tree files better by parsing the .xml file for names and then falling back to globbing if necessary.
author malex
date Thu, 26 Apr 2012 11:08:35 -0400
parents 3ffe0202fa38
children 1914e55fee82
files beast/beast.py beast/beast.xml
diffstat 2 files changed, 59 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/beast/beast.py	Tue Apr 17 10:52:00 2012 -0400
+++ b/beast/beast.py	Thu Apr 26 11:08:35 2012 -0400
@@ -14,11 +14,23 @@
     A variable number of '.tree' files depending on the XML input
 """
 import os, shutil, subprocess, sys, optparse, glob, string
+from xml.dom.minidom import parse, Node
 
 def stop_err(msg):
     sys.stderr.write("%s\n" % msg)
     sys.exit()
 
+def parseFnames(nodelist):
+    filenames = []
+    for node in nodelist:
+        if node.hasAttributes():
+            fname = node.getAttribute('fileName')
+            if fname != "":
+                filenames.append(fname)
+        else:
+            pass
+    return filenames
+
 def __main__():
     usage = "usage: %prog inputXML"
     parser = optparse.OptionParser(usage = usage)
@@ -93,9 +105,41 @@
     else:
         sys.stdout.write(stdout)
         sys.stdout.write(stderr)
+#2012-04-24 - 2nd approach, parse the .xml file:
+    xml_file = os.path.abspath(inputxml)
+    if not os.path.exists(inputxml):
+        sys.stderr.write("Cannot find the input XML file for parsing.\n")
+    dom = parse(inputxml)
+    xml_logs = dom.getElementsByTagName('log')
+    xml_trees = dom.getElementsByTagName('logTree')
+    logfiles_orig = parseFnames(xml_logs)
+    treefiles_orig = parseFnames(xml_trees)
     try:
-        for logfile in glob.glob('*.log'):
-            shutil.copyfile(os.path.basename(logfile), 'beast.log')
+        if len(logfiles_orig) == 0:
+            logfiles_orig = glob.glob("*.log*")
+            if len(logfiles_orig) == 0:
+                logfiles_orig.append('Error_no_log')
+                dummy_file = open('Error_no_log','w')
+                dummy_file.write("BEAST run has not produced a log or it's named in such a way that I can't locate it. Configure BEAST to produce .log files without spaces in their names and rerun the analysis.\n")
+                dummy_file.close()
+        logfiles = []
+        if os.path.isdir(newfilepath):
+            for filename in logfiles_orig:
+                if os.path.isfile(filename):
+                    name = string.replace(os.path.splitext(filename)[0], "_", "-")
+                    filestring = "primary_%s_%s_visible_nexus" % (treefile_id, name)
+                    newpath = os.path.join(newfilepath,filestring)
+                    logfiles.append(newpath)
+#                else:
+#                    sys.stderr.write("Can't find the log file to rename.\n")
+        logfiles[0] = logs
+        for i in range(len(logfiles_orig)):
+            src = logfiles_orig[i]
+            dst = logfiles[i]
+            if os.path.exists(src):
+                shutil.copy(src, dst)
+#            else:
+#                print "File '%s' can't be found.\n" % src
     except Exception, err:
         sys.stderr.write("Error copying log file: \n%s\n" % err)
     try:
@@ -106,12 +150,15 @@
     except Exception, err:
         sys.stderr.write("Error copying mcmc.operators file: \n%s\n" % err)
     try:
-        treefiles_orig = glob.glob('*.trees')
         if len(treefiles_orig) == 0:
-            treefiles_orig.append('Error_no_tree')
-            dummy_file = open('Error_no_tree','w')
-            dummy_file.write("BEAST run has not produced an output tree\n")
-            dummy_file.close()
+            print "No tree files found by the xml file parser.\n"
+            treefiles_orig = glob.glob("*.trees*")
+#                print "Original tree files from the directory:\n\t%s" % " ".join(treefiles_orig)
+            if len(treefiles_orig) == 0:
+                treefiles_orig.append('Error_no_tree')
+                dummy_file = open('Error_no_tree','w')
+                dummy_file.write("BEAST run has not produced an output tree or it's named in such a way that I can't locate it. Configure BEAST to produce .tree files without spaces in their names and rerun the analysis.\n")
+                dummy_file.close()
         treefiles = []
         if os.path.isdir(newfilepath):
             for filename in treefiles_orig:
@@ -122,7 +169,10 @@
                     treefiles.append(newpath)
         treefiles[0] = trees
         for i in range(len(treefiles_orig)):
-            shutil.move(treefiles_orig[i], treefiles[i])
+            src = treefiles_orig[i]
+            dst = treefiles[i]
+            if os.path.exists(src):
+                shutil.copy(src, dst)
     except Exception, err:
         sys.stderr.write("Error copying trees file(s): \n%s\n" % err)
 if __name__=="__main__": __main__()
--- a/beast/beast.xml	Tue Apr 17 10:52:00 2012 -0400
+++ b/beast/beast.xml	Thu Apr 26 11:08:35 2012 -0400
@@ -22,7 +22,7 @@
 <help>
 .. class:: warningmark
 
-The input dataset needs to be in BEAST XML format.
+The input dataset needs to be in BEAST XML format. The names of the log output files configured in the xml file should have the '.log' extension and the trees file(s) should have the '.tree' extension for the best presentation.
 
 If the random seed is not chosen "12345" will be used.