annotate commons/tools/dbSplit.py @ 19:9bcfa7936eec

Deleted selected files
author m-zytnicki
date Mon, 29 Apr 2013 03:23:29 -0400
parents 94ab73e8a190
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
1 #!/usr/bin/env python
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
2
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
3
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
4 ##@file
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
5 # Split the input fasta file in several output files
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
6 # usage: dbSplit.py [ options ]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
7 # options:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
8 # -h: this help
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
9 # -i: name of the input file (format='fasta')
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
10 # -n: number of sequences per output file (default=1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
11 # -d: record the output fasta files in a directory called 'batches'
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
12 # -s: use the sequence header if '-n 1' (otherwise 'batch_00X')"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
13 # -p: use a prefix for the output files (default='batch')"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
14 # -v: verbose (default=0/1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
15
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
16
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
17 import sys
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
18 import getopt
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
19
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
20 from commons.core.seq.FastaUtils import FastaUtils
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
21
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
22
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
23 ## Give the list of the command-line options
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
24 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
25 def help():
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
26 print
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
27 print "usage: dbSplit.py [ options ]"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
28 print "options:"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
29 print " -h: this help"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
30 print " -i: name of the input file (format='fasta')"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
31 print " -n: number of sequences per batch file (default=1)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
32 print " -d: record the output fasta files in a directory called 'batches'"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
33 print " -s: use the sequence header if '-n 1' (otherwise 'batch_00X')"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
34 print " -p: use a prefix for the output files (default='batch')"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
35 print " -v: verbosity level (default=0/1/2)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
36 print
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
37
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
38
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
39 ## Split the input fasta file in several output files
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
40 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
41 def main():
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
42 inFile = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
43 nbSeqPerBatch = 1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
44 newDir = False
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
45 useSeqHeader = False
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
46 prefix = "batch"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
47 verbose = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
48
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
49 try:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
50 opts, args = getopt.getopt( sys.argv[1:], "hi:n:dsp:v:" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
51 except getopt.GetoptError, err:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
52 sys.stderr.write( "%s\n" % ( str(err) ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
53 help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
54 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
55 for o,a in opts:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
56 if o == "-h":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
57 help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
58 sys.exit(0)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
59 elif o == "-i":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
60 inFile = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
61 elif o == "-n":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
62 nbSeqPerBatch = int(a)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
63 elif o == "-d":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
64 newDir = True
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
65 elif o == "-s":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
66 useSeqHeader = True
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
67 elif o == "-p":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
68 prefix = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
69 elif o == "-v":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
70 verbose = int(a)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
71
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
72 if inFile == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
73 msg = "ERROR: missing input file (-i)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
74 sys.stderr.write( "%s\n" % ( msg ) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
75 help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
76 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
77
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
78 if verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
79 print "START %s" % ( sys.argv[0].split("/")[-1] )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
80 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
81
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
82 FastaUtils.dbSplit( inFile, nbSeqPerBatch, newDir, useSeqHeader, prefix, verbose )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
83
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
84 if verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
85 print "END %s" % ( sys.argv[0].split("/")[-1] )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
86 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
87
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
88 return 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
89
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
90
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
91 if __name__ == "__main__":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
92 main()