18
|
1 #!/usr/bin/env python
|
|
2
|
|
3
|
|
4 ##@file
|
|
5 # Split the input fasta file in several output files
|
|
6 # usage: dbSplit.py [ options ]
|
|
7 # options:
|
|
8 # -h: this help
|
|
9 # -i: name of the input file (format='fasta')
|
|
10 # -n: number of sequences per output file (default=1)
|
|
11 # -d: record the output fasta files in a directory called 'batches'
|
|
12 # -s: use the sequence header if '-n 1' (otherwise 'batch_00X')"
|
|
13 # -p: use a prefix for the output files (default='batch')"
|
|
14 # -v: verbose (default=0/1)
|
|
15
|
|
16
|
|
17 import sys
|
|
18 import getopt
|
|
19
|
|
20 from commons.core.seq.FastaUtils import FastaUtils
|
|
21
|
|
22
|
|
23 ## Give the list of the command-line options
|
|
24 #
|
|
25 def help():
|
|
26 print
|
|
27 print "usage: dbSplit.py [ options ]"
|
|
28 print "options:"
|
|
29 print " -h: this help"
|
|
30 print " -i: name of the input file (format='fasta')"
|
|
31 print " -n: number of sequences per batch file (default=1)"
|
|
32 print " -d: record the output fasta files in a directory called 'batches'"
|
|
33 print " -s: use the sequence header if '-n 1' (otherwise 'batch_00X')"
|
|
34 print " -p: use a prefix for the output files (default='batch')"
|
|
35 print " -v: verbosity level (default=0/1/2)"
|
|
36 print
|
|
37
|
|
38
|
|
39 ## Split the input fasta file in several output files
|
|
40 #
|
|
41 def main():
|
|
42 inFile = ""
|
|
43 nbSeqPerBatch = 1
|
|
44 newDir = False
|
|
45 useSeqHeader = False
|
|
46 prefix = "batch"
|
|
47 verbose = 0
|
|
48
|
|
49 try:
|
|
50 opts, args = getopt.getopt( sys.argv[1:], "hi:n:dsp:v:" )
|
|
51 except getopt.GetoptError, err:
|
|
52 sys.stderr.write( "%s\n" % ( str(err) ) )
|
|
53 help()
|
|
54 sys.exit(1)
|
|
55 for o,a in opts:
|
|
56 if o == "-h":
|
|
57 help()
|
|
58 sys.exit(0)
|
|
59 elif o == "-i":
|
|
60 inFile = a
|
|
61 elif o == "-n":
|
|
62 nbSeqPerBatch = int(a)
|
|
63 elif o == "-d":
|
|
64 newDir = True
|
|
65 elif o == "-s":
|
|
66 useSeqHeader = True
|
|
67 elif o == "-p":
|
|
68 prefix = a
|
|
69 elif o == "-v":
|
|
70 verbose = int(a)
|
|
71
|
|
72 if inFile == "":
|
|
73 msg = "ERROR: missing input file (-i)"
|
|
74 sys.stderr.write( "%s\n" % ( msg ) )
|
|
75 help()
|
|
76 sys.exit(1)
|
|
77
|
|
78 if verbose > 0:
|
|
79 print "START %s" % ( sys.argv[0].split("/")[-1] )
|
|
80 sys.stdout.flush()
|
|
81
|
|
82 FastaUtils.dbSplit( inFile, nbSeqPerBatch, newDir, useSeqHeader, prefix, verbose )
|
|
83
|
|
84 if verbose > 0:
|
|
85 print "END %s" % ( sys.argv[0].split("/")[-1] )
|
|
86 sys.stdout.flush()
|
|
87
|
|
88 return 0
|
|
89
|
|
90
|
|
91 if __name__ == "__main__":
|
|
92 main()
|