annotate tools/sr_mapping/bfast_wrapper.py @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 #!/usr/bin/env python
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 Runs BFAST on single-end or paired-end data.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 TODO: more documentation
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 TODO:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 - auto-detect gzip or bz2
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9 - split options (?)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 - queue lengths (?)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 - assumes reference always has been indexed
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 - main and secondary indexes
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13 - scoring matrix file ?
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 - read group file ?
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 usage: bfast_wrapper.py [options]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 -r, --ref=r: The reference genome to use or index
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 -f, --fastq=f: The fastq file to use for the mapping
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19 -F, --output=u: The file to save the output (SAM format)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20 -s, --fileSource=s: Whether to use a previously indexed reference sequence or one from history (indexed or history)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 -p, --params=p: Parameter setting to use (pre_set or full)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22 -n, --numThreads=n: The number of threads to use
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23 -A, --space=A: The encoding space (0: base 1: color)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 -o, --offsets=o: The offsets for 'match'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 -l, --loadAllIndexes=l: Load all indexes into memory
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26 -k, --keySize=k: truncate key size in 'match'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 -K, --maxKeyMatches=K: the maximum number of matches to allow before a key is ignored
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28 -M, --maxNumMatches=M: the maximum number of matches to allow before the read is discarded
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29 -w, --whichStrand=w: the strands to consider (0: both 1: forward 2: reverse)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 -t, --timing=t: output timing information to stderr
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31 -u, --ungapped=u: performed ungapped local alignment
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 -U, --unconstrained=U: performed local alignment without mask constraints
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 -O, --offset=O: the number of bases before and after each hit to consider in local alignment
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 -q, --avgMismatchQuality=q: average mismatch quality
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 -a, --algorithm=a: post processing algorithm (0: no filtering, 1: all passing filters, 2: unique, 3: best scoring unique, 4: best score all)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 -P, --disallowPairing=P: do not choose alignments based on pairing
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 -R, --reverse=R: paired end reads are given on reverse strands
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 -z, --random=z: output a random best scoring alignment
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39 -D, --dbkey=D: Dbkey for reference genome
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 -H, --suppressHeader=H: Suppress the sam header
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 import optparse, os, shutil, subprocess, sys, tempfile
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 def stop_err( msg ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 sys.stderr.write( '%s\n' % msg )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 sys.exit()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49 def __main__():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 #Parse Command Line
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 parser = optparse.OptionParser()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 parser.add_option( '-r', '--ref', dest='ref', help='The reference genome to index and use' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 parser.add_option( '-f', '--fastq', dest='fastq', help='The fastq file to use for the mapping' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 parser.add_option( '-F', '--output', dest='output', help='The file to save the output (SAM format)' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 parser.add_option( '-A', '--space', dest='space', type="choice", default='0', choices=('0','1' ), help='The encoding space (0: base 1: color)' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 parser.add_option( '-H', '--suppressHeader', action="store_true", dest='suppressHeader', default=False, help='Suppress header' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 parser.add_option( '-n', '--numThreads', dest='numThreads', type="int", default="1", help='The number of threads to use' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 parser.add_option( '-t', '--timing', action="store_true", default=False, dest='timing', help='output timming information to stderr' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 parser.add_option( '-l', '--loadAllIndexes', action="store_true", default=False, dest='loadAllIndexes', help='Load all indexes into memory' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 parser.add_option( '-m', '--indexMask', dest='indexMask', help='String containing info on how to build custom indexes' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 parser.add_option( "-b", "--buildIndex", action="store_true", dest="buildIndex", default=False, help='String containing info on how to build custom indexes' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 parser.add_option( "--indexRepeatMasker", action="store_true", dest="indexRepeatMasker", default=False, help='Do not index lower case sequences. Such as those created by RepeatMasker' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 parser.add_option( '--indexContigOptions', dest='indexContigOptions', default="", help='The contig range options to use for the indexing' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 parser.add_option( '--indexExonsFileName', dest='indexExonsFileName', default="", help='The exons file to use for the indexing' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 parser.add_option( '-o', '--offsets', dest='offsets', default="", help='The offsets for \'match\'' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 parser.add_option( '-k', '--keySize', dest='keySize', type="int", default="-1", help='truncate key size in \'match\'' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 parser.add_option( '-K', '--maxKeyMatches', dest='maxKeyMatches', type="int", default="-1", help='the maximum number of matches to allow before a key is ignored' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 parser.add_option( '-M', '--maxNumMatches', dest='maxNumMatches', type="int", default="-1", help='the maximum number of matches to allow bfore the read is discarded' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 parser.add_option( '-w', '--whichStrand', dest='whichStrand', type="choice", default='0', choices=('0','1','2'), help='the strands to consider (0: both 1: forward 2: reverse)' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 parser.add_option( '--scoringMatrixFileName', dest='scoringMatrixFileName', help='Scoring Matrix file used to score the alignments' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 parser.add_option( '-u', '--ungapped', dest='ungapped', action="store_true", default=False, help='performed ungapped local alignment' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 parser.add_option( '-U', '--unconstrained', dest='unconstrained', action="store_true", default=False, help='performed local alignment without mask constraints' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 parser.add_option( '-O', '--offset', dest='offset', type="int", default="0", help='the number of bases before and after each hit to consider in local alignment' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 parser.add_option( '-q', '--avgMismatchQuality', type="int", default="-1", dest='avgMismatchQuality', help='average mismatch quality' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 parser.add_option( '-a', '--algorithm', dest='algorithm', default='0', type="choice", choices=('0','1','2','3','4' ), help='post processing algorithm (0: no filtering, 1: all passing filters, 2: unique, 3: best scoring unique, 4: best score all' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 parser.add_option( '--unpaired', dest='unpaired', action="store_true", default=False, help='do not choose alignments based on pairing' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 parser.add_option( '--reverseStrand', dest='reverseStrand', action="store_true", default=False, help='paired end reads are given on reverse strands' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 parser.add_option( '--pairedEndInfer', dest='pairedEndInfer', action="store_true", default=False, help='break ties when one end of a paired end read by estimating the insert size distribution' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 parser.add_option( '--randomBest', dest='randomBest', action="store_true", default=False, help='output a random best scoring alignment' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84 (options, args) = parser.parse_args()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86 # output version # of tool
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88 tmp = tempfile.NamedTemporaryFile().name
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89 tmp_stdout = open( tmp, 'wb' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90 proc = subprocess.Popen( args='bfast 2>&1', shell=True, stdout=tmp_stdout )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91 tmp_stdout.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92 returncode = proc.wait()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93 stdout = None
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94 for line in open( tmp_stdout.name, 'rb' ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
95 if line.lower().find( 'version' ) >= 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
96 stdout = line.strip()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
97 break
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
98 if stdout:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
99 sys.stdout.write( '%s\n' % stdout )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
100 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
101 raise Exception
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
102 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
103 sys.stdout.write( 'Could not determine BFAST version\n' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
104
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
105 buffsize = 1048576
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
106
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
107 # make temp directory for bfast, requires trailing slash
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
108 tmp_dir = '%s/' % tempfile.mkdtemp()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
109
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
110 #'generic' options used in all bfast commands here
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
111 if options.timing:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
112 all_cmd_options = "-t"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
113 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
114 all_cmd_options = ""
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
115
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
116 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
117 if options.buildIndex:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
118 reference_filepath = tempfile.NamedTemporaryFile( dir=tmp_dir, suffix='.fa' ).name
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
119 #build bfast indexes
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
120 os.symlink( options.ref, reference_filepath )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
121
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
122 #bfast fast2brg
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
123 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
124 nuc_space = [ "0" ]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
125 if options.space == "1":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
126 #color space localalign appears to require nuc space brg
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
127 nuc_space.append( "1" )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
128 for space in nuc_space:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
129 cmd = 'bfast fasta2brg -f "%s" -A "%s" %s' % ( reference_filepath, space, all_cmd_options )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
130 tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
131 tmp_stderr = open( tmp, 'wb' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
132 proc = subprocess.Popen( args=cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
133 returncode = proc.wait()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
134 tmp_stderr.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
135 # get stderr, allowing for case where it's very large
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
136 tmp_stderr = open( tmp, 'rb' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
137 stderr = ''
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
138 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
139 while True:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
140 stderr += tmp_stderr.read( buffsize )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
141 if not stderr or len( stderr ) % buffsize != 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
142 break
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
143 except OverflowError:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
144 pass
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
145 tmp_stderr.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
146 if returncode != 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
147 raise Exception, stderr
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
148 except Exception, e:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
149 raise Exception, 'Error in \'bfast fasta2brg\'.\n' + str( e )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
150
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
151 #bfast index
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
152 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
153 all_index_cmds = 'bfast index %s -f "%s" -A "%s" -n "%s"' % ( all_cmd_options, reference_filepath, options.space, options.numThreads )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
154
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
155 if options.indexRepeatMasker:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
156 all_index_cmds += " -R"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
157
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
158 if options.indexContigOptions:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
159 index_contig_options = map( int, options.indexContigOptions.split( ',' ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
160 if index_contig_options[0] >= 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
161 all_index_cmds += ' -s "%s"' % index_contig_options[0]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
162 if index_contig_options[1] >= 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
163 all_index_cmds += ' -S "%s"' % index_contig_options[1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
164 if index_contig_options[2] >= 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
165 all_index_cmds += ' -e "%s"' % index_contig_options[2]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
166 if index_contig_options[3] >= 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
167 all_index_cmds += ' -E "%s"' % index_contig_options[3]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
168 elif options.indexExonsFileName:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
169 all_index_cmds += ' -x "%s"' % options.indexExonsFileName
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
170
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
171 index_count = 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
172 for mask, hash_width in [ mask.split( ':' ) for mask in options.indexMask.split( ',' ) ]:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
173 cmd = '%s -m "%s" -w "%s" -i "%i"' % ( all_index_cmds, mask, hash_width, index_count )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
174 tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
175 tmp_stderr = open( tmp, 'wb' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
176 proc = subprocess.Popen( args=cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
177 returncode = proc.wait()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
178 tmp_stderr.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
179 # get stderr, allowing for case where it's very large
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
180 tmp_stderr = open( tmp, 'rb' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
181 stderr = ''
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
182 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
183 while True:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
184 stderr += tmp_stderr.read( buffsize )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
185 if not stderr or len( stderr ) % buffsize != 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
186 break
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
187 except OverflowError:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
188 pass
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
189 tmp_stderr.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
190 if returncode != 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
191 raise Exception, stderr
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
192 index_count += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
193 except Exception, e:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
194 raise Exception, 'Error in \'bfast index\'.\n' + str( e )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
195
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
196 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
197 reference_filepath = options.ref
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
198 assert reference_filepath and os.path.exists( reference_filepath ), 'A valid genome reference was not provided.'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
199
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
200 # set up aligning and generate aligning command options
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
201 # set up temp output files
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
202 tmp_bmf = tempfile.NamedTemporaryFile( dir=tmp_dir )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
203 tmp_bmf_name = tmp_bmf.name
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
204 tmp_bmf.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
205 tmp_baf = tempfile.NamedTemporaryFile( dir=tmp_dir )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
206 tmp_baf_name = tmp_baf.name
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
207 tmp_baf.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
208
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
209 bfast_match_cmd = 'bfast match -f "%s" -r "%s" -n "%s" -A "%s" -T "%s" -w "%s" %s' % ( reference_filepath, options.fastq, options.numThreads, options.space, tmp_dir, options.whichStrand, all_cmd_options )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
210 bfast_localalign_cmd = 'bfast localalign -f "%s" -m "%s" -n "%s" -A "%s" -o "%s" %s' % ( reference_filepath, tmp_bmf_name, options.numThreads, options.space, options.offset, all_cmd_options )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
211 bfast_postprocess_cmd = 'bfast postprocess -O 1 -f "%s" -i "%s" -n "%s" -A "%s" -a "%s" %s' % ( reference_filepath, tmp_baf_name, options.numThreads, options.space, options.algorithm, all_cmd_options )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
212
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
213 if options.offsets:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
214 bfast_match_cmd += ' -o "%s"' % options.offsets
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
215 if options.keySize >= 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
216 bfast_match_cmd += ' -k "%s"' % options.keySize
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
217 if options.maxKeyMatches >= 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
218 bfast_match_cmd += ' -K "%s"' % options.maxKeyMatches
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
219 if options.maxNumMatches >= 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
220 bfast_match_cmd += ' -M "%s"' % options.maxNumMatches
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
221 bfast_localalign_cmd += ' -M "%s"' % options.maxNumMatches
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
222 if options.scoringMatrixFileName:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
223 bfast_localalign_cmd += ' -x "%s"' % options.scoringMatrixFileName
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
224 bfast_postprocess_cmd += ' -x "%s"' % options.scoringMatrixFileName
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
225 if options.ungapped:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
226 bfast_localalign_cmd += ' -u'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
227 if options.unconstrained:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
228 bfast_localalign_cmd += ' -U'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
229 if options.avgMismatchQuality >= 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
230 bfast_localalign_cmd += ' -q "%s"' % options.avgMismatchQuality
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
231 bfast_postprocess_cmd += ' -q "%s"' % options.avgMismatchQuality
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
232 if options.algorithm == 3:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
233 if options.pairedEndInfer:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
234 bfast_postprocess_cmd += ' -P'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
235 if options.randomBest:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
236 bfast_postprocess_cmd += ' -z'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
237 if options.unpaired:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
238 bfast_postprocess_cmd += ' -U'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
239 if options.reverseStrand:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
240 bfast_postprocess_cmd += ' -R'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
241
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
242 #instead of using temp files, should we stream through pipes?
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
243 bfast_match_cmd += " > %s" % tmp_bmf_name
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
244 bfast_localalign_cmd += " > %s" % tmp_baf_name
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
245 bfast_postprocess_cmd += " > %s" % options.output
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
246
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
247 # need to nest try-except in try-finally to handle 2.4
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
248 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
249 # bfast 'match'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
250 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
251 tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
252 tmp_stderr = open( tmp, 'wb' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
253 proc = subprocess.Popen( args=bfast_match_cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
254 returncode = proc.wait()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
255 tmp_stderr.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
256 # get stderr, allowing for case where it's very large
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
257 tmp_stderr = open( tmp, 'rb' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
258 stderr = ''
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
259 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
260 while True:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
261 stderr += tmp_stderr.read( buffsize )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
262 if not stderr or len( stderr ) % buffsize != 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
263 break
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
264 except OverflowError:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
265 pass
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
266 tmp_stderr.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
267 if returncode != 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
268 raise Exception, stderr
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
269 except Exception, e:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
270 raise Exception, 'Error in \'bfast match\'. \n' + str( e )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
271 # bfast 'localalign'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
272 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
273 tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
274 tmp_stderr = open( tmp, 'wb' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
275 proc = subprocess.Popen( args=bfast_localalign_cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
276 returncode = proc.wait()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
277 tmp_stderr.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
278 # get stderr, allowing for case where it's very large
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
279 tmp_stderr = open( tmp, 'rb' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
280 stderr = ''
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
281 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
282 while True:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
283 stderr += tmp_stderr.read( buffsize )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
284 if not stderr or len( stderr ) % buffsize != 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
285 break
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
286 except OverflowError:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
287 pass
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
288 tmp_stderr.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
289 if returncode != 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
290 raise Exception, stderr
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
291 except Exception, e:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
292 raise Exception, 'Error in \'bfast localalign\'. \n' + str( e )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
293 # bfast 'postprocess'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
294 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
295 tmp = tempfile.NamedTemporaryFile( dir=tmp_dir ).name
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
296 tmp_stderr = open( tmp, 'wb' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
297 proc = subprocess.Popen( args=bfast_postprocess_cmd, shell=True, cwd=tmp_dir, stderr=tmp_stderr.fileno() )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
298 returncode = proc.wait()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
299 tmp_stderr.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
300 # get stderr, allowing for case where it's very large
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
301 tmp_stderr = open( tmp, 'rb' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
302 stderr = ''
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
303 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
304 while True:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
305 stderr += tmp_stderr.read( buffsize )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
306 if not stderr or len( stderr ) % buffsize != 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
307 break
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
308 except OverflowError:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
309 pass
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
310 tmp_stderr.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
311 if returncode != 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
312 raise Exception, stderr
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
313 except Exception, e:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
314 raise Exception, 'Error in \'bfast postprocess\'. \n' + str( e )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
315 # remove header if necessary
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
316 if options.suppressHeader:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
317 tmp_out = tempfile.NamedTemporaryFile( dir=tmp_dir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
318 tmp_out_name = tmp_out.name
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
319 tmp_out.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
320 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
321 shutil.move( options.output, tmp_out_name )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
322 except Exception, e:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
323 raise Exception, 'Error moving output file before removing headers. \n' + str( e )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
324 fout = file( options.output, 'w' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
325 for line in file( tmp_out.name, 'r' ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
326 if len( line ) < 3 or line[0:3] not in [ '@HD', '@SQ', '@RG', '@PG', '@CO' ]:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
327 fout.write( line )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
328 fout.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
329 # check that there are results in the output file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
330 if os.path.getsize( options.output ) > 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
331 if "0" == options.space:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
332 sys.stdout.write( 'BFAST run on Base Space data' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
333 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
334 sys.stdout.write( 'BFAST run on Color Space data' )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
335 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
336 raise Exception, 'The output file is empty. You may simply have no matches, or there may be an error with your input file or settings.'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
337 except Exception, e:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
338 stop_err( 'The alignment failed.\n' + str( e ) )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
339 finally:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
340 # clean up temp dir
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
341 if os.path.exists( tmp_dir ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
342 shutil.rmtree( tmp_dir )
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
343
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
344 if __name__=="__main__": __main__()