annotate enhanced_bowtie_wrapper.py @ 31:85becd29b52c draft

Uploaded
author kaymccoy
date Wed, 10 Aug 2016 12:24:08 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
31
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
1 #!/usr/bin/env python
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
2
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
3 """
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
4 Runs Bowtie on single-end or paired-end data.
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
5 For use with Bowtie v. 0.12.7
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
6
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
7 usage: bowtie_wrapper.py [options]
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
8 -t, --threads=t: The number of threads to run
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
9 -o, --output=o: The output file
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
10 --output_unmapped_reads=: File name for unmapped reads (single-end)
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
11 --output_unmapped_reads_l=: File name for unmapped reads (left, paired-end)
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
12 --output_unmapped_reads_r=: File name for unmapped reads (right, paired-end)
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
13 --output_suppressed_reads=: File name for suppressed reads because of max setting (single-end)
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
14 --output_suppressed_reads_l=: File name for suppressed reads because of max setting (left, paired-end)
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
15 --output_suppressed_reads_r=: File name for suppressed reads because of max setting (right, paired-end)
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
16 -i, --input1=i: The (forward or single-end) reads file in Sanger FASTQ format
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
17 -I, --input2=I: The reverse reads file in Sanger FASTQ format
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
18 -4, --dataType=4: The type of data (SOLiD or Solexa)
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
19 -2, --paired=2: Whether the data is single- or paired-end
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
20 -g, --genomeSource=g: The type of reference provided
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
21 -r, --ref=r: The reference genome to use or index
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
22 -s, --skip=s: Skip the first n reads
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
23 -a, --alignLimit=a: Only align the first n reads
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
24 -T, --trimH=T: Trim n bases from high-quality (left) end of each read before alignment
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
25 -L, --trimL=L: Trim n bases from low-quality (right) end of each read before alignment
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
26 -m, --mismatchSeed=m: Maximum number of mismatches permitted in the seed
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
27 -M, --mismatchQual=M: Maximum permitted total of quality values at mismatched read positions
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
28 -l, --seedLen=l: Seed length
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
29 -n, --rounding=n: Whether or not to round to the nearest 10 and saturating at 30
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
30 -P, --maxMismatches=P: Maximum number of mismatches for -v alignment mode
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
31 -w, --tryHard=: Whether or not to try as hard as possible to find valid alignments when they exist
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
32 -V, --allValAligns=V: Whether or not to report all valid alignments per read or pair
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
33 -v, --valAlign=v: Report up to n valid alignments per read or pair
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
34 -G, --suppressAlign=G: Suppress all alignments for a read if more than n reportable alignments exist
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
35 -b, --best=b: Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
36 -B, --maxBacktracks=B: Maximum number of backtracks permitted when aligning a read
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
37 -R, --strata=R: Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
38 -j, --minInsert=j: Minimum insert size for valid paired-end alignments
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
39 -J, --maxInsert=J: Maximum insert size for valid paired-end alignments
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
40 -O, --mateOrient=O: The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
41 -A, --maxAlignAttempt=A: Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
42 -f, --forwardAlign=f: Whether or not to attempt to align the forward reference strand
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
43 -E, --reverseAlign=E: Whether or not to attempt to align the reverse-complement reference strand
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
44 -F, --offrate=F: Override the offrate of the index to n
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
45 -8, --snpphred=8: SNP penalty on Phred scale
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
46 -6, --snpfrac=6: Fraction of sites expected to be SNP sites
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
47 -7, --keepends=7: Keep extreme-end nucleotides and qualities
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
48 -S, --seed=S: Seed for pseudo-random number generator
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
49 -C, --params=C: Whether to use default or specified parameters
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
50 -u, --iautoB=u: Automatic or specified behavior
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
51 -K, --ipacked=K: Whether or not to use a packed representation for DNA strings
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
52 -Q, --ibmax=Q: Maximum number of suffixes allowed in a block
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
53 -Y, --ibmaxdivn=Y: Maximum number of suffixes allowed in a block as a fraction of the length of the reference
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
54 -D, --idcv=D: The period for the difference-cover sample
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
55 -U, --inodc=U: Whether or not to disable the use of the difference-cover sample
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
56 -y, --inoref=y: Whether or not to build the part of the reference index used only in paired-end alignment
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
57 -z, --ioffrate=z: How many rows get marked during annotation of some or all of the Burrows-Wheeler rows
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
58 -W, --iftab=W: The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
59 -X, --intoa=X: Whether or not to convert Ns in the reference sequence to As
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
60 -N, --iendian=N: Endianness to use when serializing integers to the index file
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
61 -Z, --iseed=Z: Seed for the pseudorandom number generator
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
62 -x, --indexSettings=x: Whether or not indexing options are to be set
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
63 -H, --suppressHeader=H: Suppress header
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
64 --do_not_build_index: Flag to specify that provided file is already indexed and to just use 'as is'
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
65 """
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
66
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
67 import optparse, os, shutil, subprocess, sys, tempfile
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
68
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
69 #Allow more than Sanger encoded variants
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
70 DEFAULT_ASCII_ENCODING = '--phred33-quals'
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
71 GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG = { 'fastqsanger':'--phred33-quals', 'fastqillumina':'--phred64-quals', 'fastqsolexa':'--solexa-quals' }
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
72 #FIXME: Integer quality scores are supported only when the '--integer-quals' argument is specified to bowtie; this is not currently able to be set in the tool/wrapper/config
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
73
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
74 def stop_err( msg ):
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
75 sys.stderr.write( '%s\n' % msg )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
76 sys.exit()
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
77
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
78 def __main__():
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
79 #Parse Command Line
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
80 parser = optparse.OptionParser()
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
81 parser.add_option( '-t', '--threads', dest='threads', help='The number of threads to run' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
82 parser.add_option( '-o', '--output', dest='output', help='The output file' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
83 parser.add_option( '', '--output_unmapped_reads', dest='output_unmapped_reads', help='File name for unmapped reads (single-end)' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
84 parser.add_option( '', '--output_unmapped_reads_l', dest='output_unmapped_reads_l', help='File name for unmapped reads (left, paired-end)' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
85 parser.add_option( '', '--output_unmapped_reads_r', dest='output_unmapped_reads_r', help='File name for unmapped reads (right, paired-end)' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
86 parser.add_option( '', '--output_suppressed_reads', dest='output_suppressed_reads', help='File name for suppressed reads because of max setting (single-end)' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
87 parser.add_option( '', '--output_suppressed_reads_l', dest='output_suppressed_reads_l', help='File name for suppressed reads because of max setting (left, paired-end)' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
88 parser.add_option( '', '--output_suppressed_reads_r', dest='output_suppressed_reads_r', help='File name for suppressed reads because of max setting (right, paired-end)' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
89 parser.add_option( '-4', '--dataType', dest='dataType', help='The type of data (SOLiD or Solexa)' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
90 parser.add_option( '-i', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ or FASTA format' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
91
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
92
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
93
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
94 parser.add_option( '--filetype', dest='filetype', help='The filetype of your input reads - FASTA (f) or FASTQ (q)' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
95 parser.add_option( '--outtype', dest='outtype', help='The filetype of your output (nothing for map or -S for SAM)' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
96
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
97
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
98 parser.add_option( '-I', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
99 parser.add_option( '-2', '--paired', dest='paired', help='Whether the data is single- or paired-end' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
100 parser.add_option( '-g', '--genomeSource', dest='genomeSource', help='The type of reference provided' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
101 parser.add_option( '-r', '--ref', dest='ref', help='The reference genome to use or index' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
102 parser.add_option( '-s', '--skip', dest='skip', help='Skip the first n reads' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
103 parser.add_option( '-a', '--alignLimit', dest='alignLimit', help='Only align the first n reads' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
104 parser.add_option( '-T', '--trimH', dest='trimH', help='Trim n bases from high-quality (left) end of each read before alignment' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
105 parser.add_option( '-L', '--trimL', dest='trimL', help='Trim n bases from low-quality (right) end of each read before alignment' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
106 parser.add_option( '-m', '--mismatchSeed', dest='mismatchSeed', help='Maximum number of mismatches permitted in the seed' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
107 parser.add_option( '-M', '--mismatchQual', dest='mismatchQual', help='Maximum permitted total of quality values at mismatched read positions' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
108 parser.add_option( '-l', '--seedLen', dest='seedLen', help='Seed length' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
109 parser.add_option( '-n', '--rounding', dest='rounding', help='Whether or not to round to the nearest 10 and saturating at 30' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
110 parser.add_option( '-P', '--maxMismatches', dest='maxMismatches', help='Maximum number of mismatches for -v alignment mode' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
111 parser.add_option( '-w', '--tryHard', dest='tryHard', help='Whether or not to try as hard as possible to find valid alignments when they exist' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
112 parser.add_option( '-V', '--allValAligns', dest='allValAligns', help='Whether or not to report all valid alignments per read or pair' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
113 parser.add_option( '-v', '--valAlign', dest='valAlign', help='Report up to n valid alignments per read or pair' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
114 parser.add_option( '-G', '--suppressAlign', dest='suppressAlign', help='Suppress all alignments for a read if more than n reportable alignments exist' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
115 parser.add_option( '-b', '--best', dest='best', help="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions" )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
116 parser.add_option( '-B', '--maxBacktracks', dest='maxBacktracks', help='Maximum number of backtracks permitted when aligning a read' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
117 parser.add_option( '-R', '--strata', dest='strata', help='Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
118 parser.add_option( '-j', '--minInsert', dest='minInsert', help='Minimum insert size for valid paired-end alignments' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
119 parser.add_option( '-J', '--maxInsert', dest='maxInsert', help='Maximum insert size for valid paired-end alignments' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
120 parser.add_option( '-O', '--mateOrient', dest='mateOrient', help='The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
121 parser.add_option( '-A', '--maxAlignAttempt', dest='maxAlignAttempt', help='Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
122 parser.add_option( '-f', '--forwardAlign', dest='forwardAlign', help='Whether or not to attempt to align the forward reference strand' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
123 parser.add_option( '-E', '--reverseAlign', dest='reverseAlign', help='Whether or not to attempt to align the reverse-complement reference strand' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
124 parser.add_option( '-F', '--offrate', dest='offrate', help='Override the offrate of the index to n' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
125 parser.add_option( '-S', '--seed', dest='seed', help='Seed for pseudo-random number generator' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
126 parser.add_option( '-8', '--snpphred', dest='snpphred', help='SNP penalty on Phred scale' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
127 parser.add_option( '-6', '--snpfrac', dest='snpfrac', help='Fraction of sites expected to be SNP sites' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
128 parser.add_option( '-7', '--keepends', dest='keepends', help='Keep extreme-end nucleotides and qualities' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
129 parser.add_option( '-C', '--params', dest='params', help='Whether to use default or specified parameters' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
130 parser.add_option( '-u', '--iautoB', dest='iautoB', help='Automatic or specified behavior' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
131 parser.add_option( '-K', '--ipacked', dest='ipacked', help='Whether or not to use a packed representation for DNA strings' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
132 parser.add_option( '-Q', '--ibmax', dest='ibmax', help='Maximum number of suffixes allowed in a block' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
133 parser.add_option( '-Y', '--ibmaxdivn', dest='ibmaxdivn', help='Maximum number of suffixes allowed in a block as a fraction of the length of the reference' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
134 parser.add_option( '-D', '--idcv', dest='idcv', help='The period for the difference-cover sample' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
135 parser.add_option( '-U', '--inodc', dest='inodc', help='Whether or not to disable the use of the difference-cover sample' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
136 parser.add_option( '-y', '--inoref', dest='inoref', help='Whether or not to build the part of the reference index used only in paired-end alignment' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
137 parser.add_option( '-z', '--ioffrate', dest='ioffrate', help='How many rows get marked during annotation of some or all of the Burrows-Wheeler rows' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
138 parser.add_option( '-W', '--iftab', dest='iftab', help='The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
139 parser.add_option( '-X', '--intoa', dest='intoa', help='Whether or not to convert Ns in the reference sequence to As' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
140 parser.add_option( '-N', '--iendian', dest='iendian', help='Endianness to use when serializing integers to the index file' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
141 parser.add_option( '-Z', '--iseed', dest='iseed', help='Seed for the pseudorandom number generator' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
142 parser.add_option( '-x', '--indexSettings', dest='index_settings', help='Whether or not indexing options are to be set' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
143 parser.add_option( '-H', '--suppressHeader', dest='suppressHeader', help='Suppress header' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
144 parser.add_option( '--galaxy_input_format', dest='galaxy_input_format', default="fastqsanger", help='galaxy input format' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
145 parser.add_option( '--do_not_build_index', dest='do_not_build_index', action="store_true", default=False, help='Flag to specify that provided file is already indexed, use as is' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
146 (options, args) = parser.parse_args()
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
147 if options.mismatchSeed and options.maxMismatches:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
148 parser.error("options --mismatchSeed and --maxMismatches are mutually exclusive")
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
149 stdout = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
150
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
151 # make temp directory for placement of indices and copy reference file there if necessary
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
152 tmp_index_dir = tempfile.mkdtemp()
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
153 # get type of data (solid or solexa)
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
154 if options.dataType == 'solid':
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
155 colorspace = '-C'
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
156 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
157 colorspace = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
158 # index if necessary
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
159 if options.genomeSource == 'history' and not options.do_not_build_index:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
160 # set up commands
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
161 if options.index_settings =='indexPreSet':
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
162 indexing_cmds = '%s' % colorspace
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
163 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
164 try:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
165 if options.iautoB and options.iautoB == 'set':
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
166 iautoB = '--noauto'
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
167 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
168 iautoB = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
169 if options.ipacked and options.ipacked == 'packed':
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
170 ipacked = '--packed'
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
171 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
172 ipacked = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
173 if options.ibmax and int( options.ibmax ) >= 1:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
174 ibmax = '--bmax %s' % options.ibmax
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
175 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
176 ibmax = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
177 if options.ibmaxdivn and int( options.ibmaxdivn ) >= 0:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
178 ibmaxdivn = '--bmaxdivn %s' % options.ibmaxdivn
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
179 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
180 ibmaxdivn = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
181 if options.idcv and int( options.idcv ) >= 3:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
182 idcv = '--dcv %s' % options.idcv
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
183 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
184 idcv = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
185 if options.inodc and options.inodc == 'nodc':
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
186 inodc = '--nodc'
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
187 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
188 inodc = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
189 if options.inoref and options.inoref == 'noref':
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
190 inoref = '--noref'
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
191 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
192 inoref = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
193 if options.iftab and int( options.iftab ) >= 1:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
194 iftab = '--ftabchars %s' % options.iftab
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
195 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
196 iftab = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
197 if options.intoa and options.intoa == 'yes':
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
198 intoa = '--ntoa'
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
199 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
200 intoa = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
201 if options.iendian and options.iendian == 'big':
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
202 iendian = '--big'
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
203 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
204 iendian = '--little'
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
205 if options.iseed and int( options.iseed ) > 0:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
206 iseed = '--seed %s' % options.iseed
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
207 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
208 iseed = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
209 indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s' % \
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
210 ( iautoB, ipacked, ibmax, ibmaxdivn, idcv, inodc,
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
211 inoref, options.ioffrate, iftab, intoa, iendian,
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
212 iseed, colorspace )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
213 except ValueError, e:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
214 # clean up temp dir
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
215 if os.path.exists( tmp_index_dir ):
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
216 shutil.rmtree( tmp_index_dir )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
217 stop_err( "Something is wrong with the indexing parameters and the indexing and alignment could not be run. Make sure you don't have any non-numeric values where they should be numeric.\n" + str( e ) )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
218 ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
219 ref_file_name = ref_file.name
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
220 ref_file.close()
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
221 os.symlink( options.ref, ref_file_name )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
222 cmd1 = 'bowtie-build %s -f %s %s' % ( indexing_cmds, ref_file_name, ref_file_name )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
223 try:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
224 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
225 tmp_stderr = open( tmp, 'wb' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
226 proc = subprocess.Popen( args=cmd1, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
227 returncode = proc.wait()
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
228 tmp_stderr.close()
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
229 # get stderr, allowing for case where it's very large
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
230 tmp_stderr = open( tmp, 'rb' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
231 stderr = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
232 buffsize = 1048576
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
233 try:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
234 while True:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
235 stderr += tmp_stderr.read( buffsize )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
236 if not stderr or len( stderr ) % buffsize != 0:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
237 break
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
238 except OverflowError:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
239 pass
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
240 tmp_stderr.close()
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
241 if returncode != 0:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
242 raise Exception, stderr
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
243 except Exception, e:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
244 # clean up temp dir
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
245 if os.path.exists( tmp_index_dir ):
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
246 shutil.rmtree( tmp_index_dir )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
247 stop_err( 'Error indexing reference sequence\n' + str( e ) )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
248 stdout += 'File indexed. '
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
249 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
250 ref_file_name = options.ref
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
251 # set up aligning and generate aligning command options
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
252 # automatically set threads in both cases
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
253 tmp_suppressed_file_name = None
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
254 tmp_unmapped_file_name = None
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
255 if options.suppressHeader == 'true':
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
256 suppressHeader = '--sam-nohead'
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
257 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
258 suppressHeader = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
259 if options.maxInsert and int( options.maxInsert ) > 0:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
260 maxInsert = '-X %s' % options.maxInsert
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
261 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
262 maxInsert = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
263 if options.mateOrient:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
264 mateOrient = '--%s' % options.mateOrient
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
265 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
266 mateOrient = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
267 quality_score_encoding = GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG.get( options.galaxy_input_format, DEFAULT_ASCII_ENCODING )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
268 if options.params == 'preSet':
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
269 aligning_cmds = '-f %s %s -p %s -S %s %s %s ' % \
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
270 ( maxInsert, mateOrient, options.threads, suppressHeader, colorspace, quality_score_encoding )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
271
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
272 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
273 try:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
274 if options.skip and int( options.skip ) > 0:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
275 skip = '-s %s' % options.skip
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
276 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
277 skip = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
278 if options.alignLimit and int( options.alignLimit ) >= 0:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
279 alignLimit = '-u %s' % options.alignLimit
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
280 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
281 alignLimit = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
282 if options.trimH and int( options.trimH ) > 0:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
283 trimH = '-5 %s' % options.trimH
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
284 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
285 trimH = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
286 if options.trimL and int( options.trimL ) > 0:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
287 trimL = '-3 %s' % options.trimL
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
288 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
289 trimL = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
290 if options.maxMismatches and (options.maxMismatches == '0' or options.maxMismatches == '1' \
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
291 or options.maxMismatches == '2' or options.maxMismatches == '3'):
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
292 maxMismatches = '-v %s' % options.maxMismatches
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
293 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
294 maxMismatches = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
295 if options.mismatchSeed and (options.mismatchSeed == '0' or options.mismatchSeed == '1' \
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
296 or options.mismatchSeed == '2' or options.mismatchSeed == '3'):
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
297 mismatchSeed = '-n %s' % options.mismatchSeed
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
298 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
299 mismatchSeed = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
300 if options.mismatchQual and int( options.mismatchQual ) >= 1:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
301 mismatchQual = '-e %s' % options.mismatchQual
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
302 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
303 mismatchQual = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
304 if options.seedLen and int( options.seedLen ) >= 5:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
305 seedLen = '-l %s' % options.seedLen
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
306 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
307 seedLen = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
308 if options.rounding == 'noRound':
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
309 rounding = '--nomaqround'
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
310 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
311 rounding = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
312 if options.minInsert and int( options.minInsert ) > 0:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
313 minInsert = '-I %s' % options.minInsert
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
314 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
315 minInsert = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
316 if options.maxAlignAttempt and int( options.maxAlignAttempt ) >= 0:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
317 maxAlignAttempt = '--pairtries %s' % options.maxAlignAttempt
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
318 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
319 maxAlignAttempt = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
320 if options.forwardAlign == 'noForward':
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
321 forwardAlign = '--nofw'
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
322 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
323 forwardAlign = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
324 if options.reverseAlign == 'noReverse':
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
325 reverseAlign = '--norc'
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
326 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
327 reverseAlign = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
328 if options.maxBacktracks and int( options.maxBacktracks ) > 0 and \
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
329 ( options.mismatchSeed == '2' or options.mismatchSeed == '3' ):
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
330 maxBacktracks = '--maxbts %s' % options.maxBacktracks
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
331 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
332 maxBacktracks = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
333 if options.tryHard == 'doTryHard':
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
334 tryHard = '-y'
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
335 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
336 tryHard = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
337 if options.valAlign and int( options.valAlign ) >= 0:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
338 valAlign = '-k %s' % options.valAlign
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
339 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
340 valAlign = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
341 if options.allValAligns == 'doAllValAligns':
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
342 allValAligns = '-a'
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
343 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
344 allValAligns = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
345 if options.suppressAlign and int( options.suppressAlign ) >= 0:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
346 suppressAlign = '-m %s' % options.suppressAlign
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
347 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
348 suppressAlign = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
349 if options.best == 'doBest':
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
350 best = '--best'
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
351 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
352 best = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
353 if options.strata == 'doStrata':
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
354 strata = '--strata'
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
355 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
356 strata = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
357 if options.offrate and int( options.offrate ) >= 0:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
358 offrate = '-o %s' % options.offrate
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
359 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
360 offrate = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
361 if options.seed and int( options.seed ) >= 0:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
362 seed = '--seed %s' % options.seed
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
363 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
364 seed = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
365 if options.paired == 'paired':
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
366 if options.output_unmapped_reads_l and options.output_unmapped_reads_r:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
367 tmp_unmapped_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
368 tmp_unmapped_file_name = tmp_unmapped_file.name
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
369 tmp_unmapped_file.close()
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
370 output_unmapped_reads = '--un %s' % tmp_unmapped_file_name
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
371 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
372 output_unmapped_reads = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
373 if options.output_suppressed_reads:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
374 tmp_suppressed_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
375 tmp_suppressed_file_name = tmp_suppressed_file.name
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
376 tmp_suppressed_file.close()
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
377 output_suppressed_reads = '--max %s' % tmp_suppressed_file_name
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
378 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
379 output_suppressed_reads = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
380 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
381 if options.output_unmapped_reads:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
382 output_unmapped_reads = '--un %s' % options.output_unmapped_reads
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
383 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
384 output_unmapped_reads = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
385 if options.output_suppressed_reads:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
386 output_suppressed_reads = '--max %s' % options.output_suppressed_reads
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
387 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
388 output_suppressed_reads = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
389 snpfrac = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
390 if options.snpphred and int( options.snpphred ) >= 0:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
391 snpphred = '--snpphred %s' % options.snpphred
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
392 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
393 snpphred = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
394 if options.snpfrac and float( options.snpfrac ) >= 0:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
395 snpfrac = '--snpfrac %s' % options.snpfrac
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
396 if options.keepends and options.keepends == 'doKeepends':
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
397 keepends = '--col-keepends'
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
398 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
399 keepends = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
400
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
401
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
402
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
403
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
404
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
405 if options.filetype == 'f':
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
406 filetype = '-f'
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
407 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
408 filetype = '-q'
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
409
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
410 if options.outtype == 'S':
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
411 outtype = '-S'
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
412 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
413 outtype = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
414
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
415
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
416 aligning_cmds = '%s %s %s -p %s %s %s %s %s %s %s %s %s %s %s %s %s %s ' \
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
417 '%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s ' % \
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
418 ( filetype, maxInsert, mateOrient, options.threads, outtype, suppressHeader,
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
419 colorspace, skip, alignLimit, trimH, trimL, maxMismatches,
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
420 mismatchSeed, mismatchQual, seedLen, rounding, minInsert,
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
421 maxAlignAttempt, forwardAlign, reverseAlign, maxBacktracks,
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
422 tryHard, valAlign, allValAligns, suppressAlign, best,
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
423 strata, offrate, seed, snpphred, snpfrac, keepends,
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
424 output_unmapped_reads, output_suppressed_reads,
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
425 quality_score_encoding )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
426
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
427
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
428
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
429
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
430
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
431
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
432 except ValueError, e:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
433 # clean up temp dir
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
434 if os.path.exists( tmp_index_dir ):
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
435 shutil.rmtree( tmp_index_dir )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
436 stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
437 try:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
438 # have to nest try-except in try-finally to handle 2.4
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
439 try:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
440 # prepare actual mapping commands
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
441 if options.paired == 'paired':
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
442 cmd2 = 'bowtie %s %s -1 %s -2 %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.input2, options.output )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
443 else:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
444 cmd2 = 'bowtie %s %s %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.output )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
445 # align
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
446 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
447 tmp_stderr = open( tmp, 'wb' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
448 proc = subprocess.Popen( args=cmd2, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
449 returncode = proc.wait()
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
450 tmp_stderr.close()
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
451 # get stderr, allowing for case where it's very large
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
452 tmp_stderr = open( tmp, 'rb' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
453 stderr = ''
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
454 buffsize = 1048576
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
455 try:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
456 while True:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
457 stderr += tmp_stderr.read( buffsize )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
458 if not stderr or len( stderr ) % buffsize != 0:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
459 break
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
460 except OverflowError:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
461 pass
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
462 tmp_stderr.close()
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
463 if returncode != 0:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
464 raise Exception, stderr
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
465 # get suppressed and unmapped reads output files in place if appropriate
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
466 if options.paired == 'paired' and tmp_suppressed_file_name and \
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
467 options.output_suppressed_reads_l and options.output_suppressed_reads_r:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
468 try:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
469 left = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
470 right = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
471 shutil.move( left, options.output_suppressed_reads_l )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
472 shutil.move( right, options.output_suppressed_reads_r )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
473 except Exception, e:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
474 sys.stdout.write( 'Error producing the suppressed output file.\n' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
475 if options.paired == 'paired' and tmp_unmapped_file_name and \
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
476 options.output_unmapped_reads_l and options.output_unmapped_reads_r:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
477 try:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
478 left = tmp_unmapped_file_name.replace( '.fastq', '_1.fastq' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
479 right = tmp_unmapped_file_name.replace( '.fastq', '_2.fastq' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
480 shutil.move( left, options.output_unmapped_reads_l )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
481 shutil.move( right, options.output_unmapped_reads_r )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
482 except Exception, e:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
483 sys.stdout.write( 'Error producing the unmapped output file.\n' )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
484 # check that there are results in the output file
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
485 if os.path.getsize( options.output ) == 0:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
486 raise Exception, 'The output file is empty, there may be an error with your input file or settings.'
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
487 except Exception, e:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
488 stop_err( 'Error aligning sequence. ' + str( e ) )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
489 finally:
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
490 # clean up temp dir
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
491 if os.path.exists( tmp_index_dir ):
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
492 shutil.rmtree( tmp_index_dir )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
493 stdout += 'Sequence file aligned.\n'
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
494 sys.stdout.write( stdout )
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
495
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
496 if __name__ == "__main__":
85becd29b52c Uploaded
kaymccoy
parents:
diff changeset
497 __main__()