diff commons/launcher/LaunchRefalign_old.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/launcher/LaunchRefalign_old.py	Tue Apr 30 14:33:21 2013 -0400
@@ -0,0 +1,105 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import getopt
+
+import pyRepet.launcher.programLauncher
+from commons.core.seq.BioseqDB import BioseqDB
+from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders
+
+
+def help():
+    print
+    print "usage: launchRefalign.py [ options ]"
+    print "options:"
+    print "     -h: this help"
+    print "     -i: name of the input file (refseq is first, format='fasta')"
+    print "     -r: keep the reference sequence"
+    print "     -o: name of the output file (default=inFileName+'.fa_aln')"
+    print "     -v: verbose (default=0)"
+    print
+
+
+def main():
+    """
+    Launch 'refalign' to build a master-slave multiple sequence alignment.
+    """
+    inFileName = ""
+    keepRefseq = False
+    outFileName = ""
+    verbose = 0
+    try:
+        opts,args=getopt.getopt(sys.argv[1:],"hi:ro:v:")
+    except getopt.GetoptError, err:
+            print str(err); help(); sys.exit(1)
+    for o,a in opts:
+        if o == "-h":
+            help()
+            sys.exit(0)
+        elif o == "-i":
+            inFileName = a
+        elif o == "-r":
+            keepRefseq = True
+        elif o == "-o":
+            outFileName = a
+        elif o == "-v":
+            verbose = int(a)
+    if inFileName == "":
+        print "ERROR: missing compulsory options"
+        help()
+        sys.exit(1)
+        
+    if verbose > 0:
+        print "START %s" % (sys.argv[0].split("/")[-1])
+        sys.stdout.flush()
+        
+    if verbose > 0:
+        print "build a multiple alignment from '%s'..." % ( inFileName )
+        sys.stdout.flush()
+        
+    if outFileName == "":
+        outFileName = "%s.fa_aln" % ( inFileName )
+        
+    csh = ChangeSequenceHeaders()
+    csh.setInputFile( inFileName )
+    csh.setFormat( "fasta" )
+    csh.setStep( 1 )
+    csh.setPrefix( "seq" )
+    csh.setLinkFile(  inFileName+".shortHlink" )
+    csh.setOutputFile( inFileName+".shortH" )
+    csh.setVerbosityLevel( verbose - 1 )
+    csh.run()
+    
+    bsDB = BioseqDB( inFileName+".shortH" )
+    bsDB.upCase()
+    bsDB.save( inFileName+".shortHtmp" )
+    del bsDB
+    os.rename( inFileName+".shortHtmp", inFileName+".shortH" )
+    
+    pL = pyRepet.launcher.programLauncher.programLauncher( inFileName+".shortH" )
+    if keepRefseq:
+        pL.launchRefalign( outFileName=inFileName+".shortH.fa_aln", refseqName="seq1", verbose=verbose )
+    else:
+        pL.launchRefalign( outFileName=inFileName+".shortH.fa_aln", verbose=verbose )
+        
+    csh.setInputFile( inFileName+".shortH.fa_aln" )
+    csh.setFormat( "fasta" )
+    csh.setStep( 2 )
+    csh.setLinkFile(  inFileName+".shortHlink" )
+    csh.setOutputFile(  outFileName )
+    csh.setVerbosityLevel( verbose - 1 )
+    csh.run()
+    
+    for f in [ inFileName+".shortH", inFileName+".shortHlink", inFileName+".shortH.fa_aln" ]:
+            os.remove( f )
+            
+    if verbose > 0:
+        print "END %s" % (sys.argv[0].split("/")[-1])
+        sys.stdout.flush()
+        
+    return 0
+
+
+if __name__ == "__main__":
+    main()