18
|
1 #!/usr/bin/env python
|
|
2
|
|
3 import os
|
|
4 import sys
|
|
5 import getopt
|
|
6
|
|
7 import pyRepet.launcher.programLauncher
|
|
8 from commons.core.seq.BioseqDB import BioseqDB
|
|
9 from commons.tools.ChangeSequenceHeaders import ChangeSequenceHeaders
|
|
10
|
|
11
|
|
12 def help():
|
|
13 print
|
|
14 print "usage: launchRefalign.py [ options ]"
|
|
15 print "options:"
|
|
16 print " -h: this help"
|
|
17 print " -i: name of the input file (refseq is first, format='fasta')"
|
|
18 print " -r: keep the reference sequence"
|
|
19 print " -o: name of the output file (default=inFileName+'.fa_aln')"
|
|
20 print " -v: verbose (default=0)"
|
|
21 print
|
|
22
|
|
23
|
|
24 def main():
|
|
25 """
|
|
26 Launch 'refalign' to build a master-slave multiple sequence alignment.
|
|
27 """
|
|
28 inFileName = ""
|
|
29 keepRefseq = False
|
|
30 outFileName = ""
|
|
31 verbose = 0
|
|
32 try:
|
|
33 opts,args=getopt.getopt(sys.argv[1:],"hi:ro:v:")
|
|
34 except getopt.GetoptError, err:
|
|
35 print str(err); help(); sys.exit(1)
|
|
36 for o,a in opts:
|
|
37 if o == "-h":
|
|
38 help()
|
|
39 sys.exit(0)
|
|
40 elif o == "-i":
|
|
41 inFileName = a
|
|
42 elif o == "-r":
|
|
43 keepRefseq = True
|
|
44 elif o == "-o":
|
|
45 outFileName = a
|
|
46 elif o == "-v":
|
|
47 verbose = int(a)
|
|
48 if inFileName == "":
|
|
49 print "ERROR: missing compulsory options"
|
|
50 help()
|
|
51 sys.exit(1)
|
|
52
|
|
53 if verbose > 0:
|
|
54 print "START %s" % (sys.argv[0].split("/")[-1])
|
|
55 sys.stdout.flush()
|
|
56
|
|
57 if verbose > 0:
|
|
58 print "build a multiple alignment from '%s'..." % ( inFileName )
|
|
59 sys.stdout.flush()
|
|
60
|
|
61 if outFileName == "":
|
|
62 outFileName = "%s.fa_aln" % ( inFileName )
|
|
63
|
|
64 csh = ChangeSequenceHeaders()
|
|
65 csh.setInputFile( inFileName )
|
|
66 csh.setFormat( "fasta" )
|
|
67 csh.setStep( 1 )
|
|
68 csh.setPrefix( "seq" )
|
|
69 csh.setLinkFile( inFileName+".shortHlink" )
|
|
70 csh.setOutputFile( inFileName+".shortH" )
|
|
71 csh.setVerbosityLevel( verbose - 1 )
|
|
72 csh.run()
|
|
73
|
|
74 bsDB = BioseqDB( inFileName+".shortH" )
|
|
75 bsDB.upCase()
|
|
76 bsDB.save( inFileName+".shortHtmp" )
|
|
77 del bsDB
|
|
78 os.rename( inFileName+".shortHtmp", inFileName+".shortH" )
|
|
79
|
|
80 pL = pyRepet.launcher.programLauncher.programLauncher( inFileName+".shortH" )
|
|
81 if keepRefseq:
|
|
82 pL.launchRefalign( outFileName=inFileName+".shortH.fa_aln", refseqName="seq1", verbose=verbose )
|
|
83 else:
|
|
84 pL.launchRefalign( outFileName=inFileName+".shortH.fa_aln", verbose=verbose )
|
|
85
|
|
86 csh.setInputFile( inFileName+".shortH.fa_aln" )
|
|
87 csh.setFormat( "fasta" )
|
|
88 csh.setStep( 2 )
|
|
89 csh.setLinkFile( inFileName+".shortHlink" )
|
|
90 csh.setOutputFile( outFileName )
|
|
91 csh.setVerbosityLevel( verbose - 1 )
|
|
92 csh.run()
|
|
93
|
|
94 for f in [ inFileName+".shortH", inFileName+".shortHlink", inFileName+".shortH.fa_aln" ]:
|
|
95 os.remove( f )
|
|
96
|
|
97 if verbose > 0:
|
|
98 print "END %s" % (sys.argv[0].split("/")[-1])
|
|
99 sys.stdout.flush()
|
|
100
|
|
101 return 0
|
|
102
|
|
103
|
|
104 if __name__ == "__main__":
|
|
105 main()
|