annotate enhanced_bowtie_wrapper 1.0.0.py @ 10:a12f1a455df7 draft

Uploaded
author kaymccoy
date Sun, 07 Aug 2016 22:30:47 -0400
parents 5b41acce238e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
1 #!/usr/bin/env python
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
2
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
3 """
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
4 Runs Bowtie on single-end or paired-end data.
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
5 For use with Bowtie v. 0.12.7
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
6
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
7 usage: bowtie_wrapper.py [options]
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
8 -t, --threads=t: The number of threads to run
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
9 -o, --output=o: The output file
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
10 --output_unmapped_reads=: File name for unmapped reads (single-end)
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
11 --output_unmapped_reads_l=: File name for unmapped reads (left, paired-end)
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
12 --output_unmapped_reads_r=: File name for unmapped reads (right, paired-end)
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
13 --output_suppressed_reads=: File name for suppressed reads because of max setting (single-end)
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
14 --output_suppressed_reads_l=: File name for suppressed reads because of max setting (left, paired-end)
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
15 --output_suppressed_reads_r=: File name for suppressed reads because of max setting (right, paired-end)
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
16 -i, --input1=i: The (forward or single-end) reads file in Sanger FASTQ format
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
17 -I, --input2=I: The reverse reads file in Sanger FASTQ format
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
18 -4, --dataType=4: The type of data (SOLiD or Solexa)
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
19 -2, --paired=2: Whether the data is single- or paired-end
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
20 -g, --genomeSource=g: The type of reference provided
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
21 -r, --ref=r: The reference genome to use or index
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
22 -s, --skip=s: Skip the first n reads
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
23 -a, --alignLimit=a: Only align the first n reads
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
24 -T, --trimH=T: Trim n bases from high-quality (left) end of each read before alignment
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
25 -L, --trimL=L: Trim n bases from low-quality (right) end of each read before alignment
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
26 -m, --mismatchSeed=m: Maximum number of mismatches permitted in the seed
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
27 -M, --mismatchQual=M: Maximum permitted total of quality values at mismatched read positions
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
28 -l, --seedLen=l: Seed length
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
29 -n, --rounding=n: Whether or not to round to the nearest 10 and saturating at 30
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
30 -P, --maxMismatches=P: Maximum number of mismatches for -v alignment mode
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
31 -w, --tryHard=: Whether or not to try as hard as possible to find valid alignments when they exist
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
32 -V, --allValAligns=V: Whether or not to report all valid alignments per read or pair
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
33 -v, --valAlign=v: Report up to n valid alignments per read or pair
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
34 -G, --suppressAlign=G: Suppress all alignments for a read if more than n reportable alignments exist
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
35 -b, --best=b: Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
36 -B, --maxBacktracks=B: Maximum number of backtracks permitted when aligning a read
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
37 -R, --strata=R: Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
38 -j, --minInsert=j: Minimum insert size for valid paired-end alignments
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
39 -J, --maxInsert=J: Maximum insert size for valid paired-end alignments
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
40 -O, --mateOrient=O: The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
41 -A, --maxAlignAttempt=A: Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
42 -f, --forwardAlign=f: Whether or not to attempt to align the forward reference strand
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
43 -E, --reverseAlign=E: Whether or not to attempt to align the reverse-complement reference strand
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
44 -F, --offrate=F: Override the offrate of the index to n
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
45 -8, --snpphred=8: SNP penalty on Phred scale
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
46 -6, --snpfrac=6: Fraction of sites expected to be SNP sites
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
47 -7, --keepends=7: Keep extreme-end nucleotides and qualities
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
48 -S, --seed=S: Seed for pseudo-random number generator
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
49 -C, --params=C: Whether to use default or specified parameters
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
50 -u, --iautoB=u: Automatic or specified behavior
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
51 -K, --ipacked=K: Whether or not to use a packed representation for DNA strings
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
52 -Q, --ibmax=Q: Maximum number of suffixes allowed in a block
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
53 -Y, --ibmaxdivn=Y: Maximum number of suffixes allowed in a block as a fraction of the length of the reference
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
54 -D, --idcv=D: The period for the difference-cover sample
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
55 -U, --inodc=U: Whether or not to disable the use of the difference-cover sample
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
56 -y, --inoref=y: Whether or not to build the part of the reference index used only in paired-end alignment
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
57 -z, --ioffrate=z: How many rows get marked during annotation of some or all of the Burrows-Wheeler rows
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
58 -W, --iftab=W: The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
59 -X, --intoa=X: Whether or not to convert Ns in the reference sequence to As
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
60 -N, --iendian=N: Endianness to use when serializing integers to the index file
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
61 -Z, --iseed=Z: Seed for the pseudorandom number generator
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
62 -x, --indexSettings=x: Whether or not indexing options are to be set
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
63 -H, --suppressHeader=H: Suppress header
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
64 --do_not_build_index: Flag to specify that provided file is already indexed and to just use 'as is'
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
65 """
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
66
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
67 import optparse, os, shutil, subprocess, sys, tempfile
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
68
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
69 #Allow more than Sanger encoded variants
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
70 DEFAULT_ASCII_ENCODING = '--phred33-quals'
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
71 GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG = { 'fastqsanger':'--phred33-quals', 'fastqillumina':'--phred64-quals', 'fastqsolexa':'--solexa-quals' }
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
72 #FIXME: Integer quality scores are supported only when the '--integer-quals' argument is specified to bowtie; this is not currently able to be set in the tool/wrapper/config
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
73
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
74 def stop_err( msg ):
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
75 sys.stderr.write( '%s\n' % msg )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
76 sys.exit()
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
77
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
78 def __main__():
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
79 #Parse Command Line
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
80 parser = optparse.OptionParser()
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
81 parser.add_option( '-t', '--threads', dest='threads', help='The number of threads to run' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
82 parser.add_option( '-o', '--output', dest='output', help='The output file' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
83 parser.add_option( '', '--output_unmapped_reads', dest='output_unmapped_reads', help='File name for unmapped reads (single-end)' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
84 parser.add_option( '', '--output_unmapped_reads_l', dest='output_unmapped_reads_l', help='File name for unmapped reads (left, paired-end)' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
85 parser.add_option( '', '--output_unmapped_reads_r', dest='output_unmapped_reads_r', help='File name for unmapped reads (right, paired-end)' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
86 parser.add_option( '', '--output_suppressed_reads', dest='output_suppressed_reads', help='File name for suppressed reads because of max setting (single-end)' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
87 parser.add_option( '', '--output_suppressed_reads_l', dest='output_suppressed_reads_l', help='File name for suppressed reads because of max setting (left, paired-end)' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
88 parser.add_option( '', '--output_suppressed_reads_r', dest='output_suppressed_reads_r', help='File name for suppressed reads because of max setting (right, paired-end)' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
89 parser.add_option( '-4', '--dataType', dest='dataType', help='The type of data (SOLiD or Solexa)' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
90 parser.add_option( '-i', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ or FASTA format' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
91
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
92
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
93
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
94 parser.add_option( '--filetype', dest='filetype', help='The filetype of your input reads - FASTA (f) or FASTQ (q)' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
95 parser.add_option( '--outtype', dest='outtype', help='The filetype of your output (nothing for map or -S for SAM)' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
96
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
97
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
98 parser.add_option( '-I', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
99 parser.add_option( '-2', '--paired', dest='paired', help='Whether the data is single- or paired-end' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
100 parser.add_option( '-g', '--genomeSource', dest='genomeSource', help='The type of reference provided' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
101 parser.add_option( '-r', '--ref', dest='ref', help='The reference genome to use or index' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
102 parser.add_option( '-s', '--skip', dest='skip', help='Skip the first n reads' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
103 parser.add_option( '-a', '--alignLimit', dest='alignLimit', help='Only align the first n reads' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
104 parser.add_option( '-T', '--trimH', dest='trimH', help='Trim n bases from high-quality (left) end of each read before alignment' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
105 parser.add_option( '-L', '--trimL', dest='trimL', help='Trim n bases from low-quality (right) end of each read before alignment' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
106 parser.add_option( '-m', '--mismatchSeed', dest='mismatchSeed', help='Maximum number of mismatches permitted in the seed' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
107 parser.add_option( '-M', '--mismatchQual', dest='mismatchQual', help='Maximum permitted total of quality values at mismatched read positions' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
108 parser.add_option( '-l', '--seedLen', dest='seedLen', help='Seed length' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
109 parser.add_option( '-n', '--rounding', dest='rounding', help='Whether or not to round to the nearest 10 and saturating at 30' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
110 parser.add_option( '-P', '--maxMismatches', dest='maxMismatches', help='Maximum number of mismatches for -v alignment mode' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
111 parser.add_option( '-w', '--tryHard', dest='tryHard', help='Whether or not to try as hard as possible to find valid alignments when they exist' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
112 parser.add_option( '-V', '--allValAligns', dest='allValAligns', help='Whether or not to report all valid alignments per read or pair' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
113 parser.add_option( '-v', '--valAlign', dest='valAlign', help='Report up to n valid alignments per read or pair' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
114 parser.add_option( '-G', '--suppressAlign', dest='suppressAlign', help='Suppress all alignments for a read if more than n reportable alignments exist' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
115 parser.add_option( '-b', '--best', dest='best', help="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions" )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
116 parser.add_option( '-B', '--maxBacktracks', dest='maxBacktracks', help='Maximum number of backtracks permitted when aligning a read' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
117 parser.add_option( '-R', '--strata', dest='strata', help='Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
118 parser.add_option( '-j', '--minInsert', dest='minInsert', help='Minimum insert size for valid paired-end alignments' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
119 parser.add_option( '-J', '--maxInsert', dest='maxInsert', help='Maximum insert size for valid paired-end alignments' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
120 parser.add_option( '-O', '--mateOrient', dest='mateOrient', help='The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
121 parser.add_option( '-A', '--maxAlignAttempt', dest='maxAlignAttempt', help='Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
122 parser.add_option( '-f', '--forwardAlign', dest='forwardAlign', help='Whether or not to attempt to align the forward reference strand' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
123 parser.add_option( '-E', '--reverseAlign', dest='reverseAlign', help='Whether or not to attempt to align the reverse-complement reference strand' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
124 parser.add_option( '-F', '--offrate', dest='offrate', help='Override the offrate of the index to n' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
125 parser.add_option( '-S', '--seed', dest='seed', help='Seed for pseudo-random number generator' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
126 parser.add_option( '-8', '--snpphred', dest='snpphred', help='SNP penalty on Phred scale' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
127 parser.add_option( '-6', '--snpfrac', dest='snpfrac', help='Fraction of sites expected to be SNP sites' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
128 parser.add_option( '-7', '--keepends', dest='keepends', help='Keep extreme-end nucleotides and qualities' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
129 parser.add_option( '-C', '--params', dest='params', help='Whether to use default or specified parameters' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
130 parser.add_option( '-u', '--iautoB', dest='iautoB', help='Automatic or specified behavior' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
131 parser.add_option( '-K', '--ipacked', dest='ipacked', help='Whether or not to use a packed representation for DNA strings' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
132 parser.add_option( '-Q', '--ibmax', dest='ibmax', help='Maximum number of suffixes allowed in a block' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
133 parser.add_option( '-Y', '--ibmaxdivn', dest='ibmaxdivn', help='Maximum number of suffixes allowed in a block as a fraction of the length of the reference' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
134 parser.add_option( '-D', '--idcv', dest='idcv', help='The period for the difference-cover sample' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
135 parser.add_option( '-U', '--inodc', dest='inodc', help='Whether or not to disable the use of the difference-cover sample' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
136 parser.add_option( '-y', '--inoref', dest='inoref', help='Whether or not to build the part of the reference index used only in paired-end alignment' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
137 parser.add_option( '-z', '--ioffrate', dest='ioffrate', help='How many rows get marked during annotation of some or all of the Burrows-Wheeler rows' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
138 parser.add_option( '-W', '--iftab', dest='iftab', help='The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
139 parser.add_option( '-X', '--intoa', dest='intoa', help='Whether or not to convert Ns in the reference sequence to As' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
140 parser.add_option( '-N', '--iendian', dest='iendian', help='Endianness to use when serializing integers to the index file' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
141 parser.add_option( '-Z', '--iseed', dest='iseed', help='Seed for the pseudorandom number generator' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
142 parser.add_option( '-x', '--indexSettings', dest='index_settings', help='Whether or not indexing options are to be set' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
143 parser.add_option( '-H', '--suppressHeader', dest='suppressHeader', help='Suppress header' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
144 parser.add_option( '--galaxy_input_format', dest='galaxy_input_format', default="fastqsanger", help='galaxy input format' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
145 parser.add_option( '--do_not_build_index', dest='do_not_build_index', action="store_true", default=False, help='Flag to specify that provided file is already indexed, use as is' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
146 (options, args) = parser.parse_args()
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
147 if options.mismatchSeed and options.maxMismatches:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
148 parser.error("options --mismatchSeed and --maxMismatches are mutually exclusive")
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
149 stdout = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
150
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
151 # make temp directory for placement of indices and copy reference file there if necessary
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
152 tmp_index_dir = tempfile.mkdtemp()
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
153 # get type of data (solid or solexa)
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
154 if options.dataType == 'solid':
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
155 colorspace = '-C'
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
156 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
157 colorspace = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
158 # index if necessary
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
159 if options.genomeSource == 'history' and not options.do_not_build_index:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
160 # set up commands
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
161 if options.index_settings =='indexPreSet':
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
162 indexing_cmds = '%s' % colorspace
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
163 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
164 try:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
165 if options.iautoB and options.iautoB == 'set':
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
166 iautoB = '--noauto'
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
167 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
168 iautoB = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
169 if options.ipacked and options.ipacked == 'packed':
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
170 ipacked = '--packed'
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
171 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
172 ipacked = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
173 if options.ibmax and int( options.ibmax ) >= 1:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
174 ibmax = '--bmax %s' % options.ibmax
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
175 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
176 ibmax = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
177 if options.ibmaxdivn and int( options.ibmaxdivn ) >= 0:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
178 ibmaxdivn = '--bmaxdivn %s' % options.ibmaxdivn
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
179 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
180 ibmaxdivn = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
181 if options.idcv and int( options.idcv ) >= 3:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
182 idcv = '--dcv %s' % options.idcv
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
183 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
184 idcv = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
185 if options.inodc and options.inodc == 'nodc':
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
186 inodc = '--nodc'
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
187 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
188 inodc = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
189 if options.inoref and options.inoref == 'noref':
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
190 inoref = '--noref'
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
191 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
192 inoref = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
193 if options.iftab and int( options.iftab ) >= 1:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
194 iftab = '--ftabchars %s' % options.iftab
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
195 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
196 iftab = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
197 if options.intoa and options.intoa == 'yes':
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
198 intoa = '--ntoa'
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
199 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
200 intoa = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
201 if options.iendian and options.iendian == 'big':
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
202 iendian = '--big'
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
203 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
204 iendian = '--little'
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
205 if options.iseed and int( options.iseed ) > 0:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
206 iseed = '--seed %s' % options.iseed
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
207 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
208 iseed = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
209 indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s' % \
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
210 ( iautoB, ipacked, ibmax, ibmaxdivn, idcv, inodc,
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
211 inoref, options.ioffrate, iftab, intoa, iendian,
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
212 iseed, colorspace )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
213 except ValueError, e:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
214 # clean up temp dir
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
215 if os.path.exists( tmp_index_dir ):
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
216 shutil.rmtree( tmp_index_dir )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
217 stop_err( "Something is wrong with the indexing parameters and the indexing and alignment could not be run. Make sure you don't have any non-numeric values where they should be numeric.\n" + str( e ) )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
218 ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
219 ref_file_name = ref_file.name
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
220 ref_file.close()
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
221 os.symlink( options.ref, ref_file_name )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
222 cmd1 = 'bowtie-build %s -f %s %s' % ( indexing_cmds, ref_file_name, ref_file_name )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
223 try:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
224 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
225 tmp_stderr = open( tmp, 'wb' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
226 proc = subprocess.Popen( args=cmd1, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
227 returncode = proc.wait()
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
228 tmp_stderr.close()
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
229 # get stderr, allowing for case where it's very large
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
230 tmp_stderr = open( tmp, 'rb' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
231 stderr = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
232 buffsize = 1048576
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
233 try:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
234 while True:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
235 stderr += tmp_stderr.read( buffsize )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
236 if not stderr or len( stderr ) % buffsize != 0:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
237 break
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
238 except OverflowError:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
239 pass
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
240 tmp_stderr.close()
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
241 if returncode != 0:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
242 raise Exception, stderr
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
243 except Exception, e:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
244 # clean up temp dir
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
245 if os.path.exists( tmp_index_dir ):
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
246 shutil.rmtree( tmp_index_dir )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
247 stop_err( 'Error indexing reference sequence\n' + str( e ) )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
248 stdout += 'File indexed. '
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
249 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
250 ref_file_name = options.ref
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
251 # set up aligning and generate aligning command options
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
252 # automatically set threads in both cases
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
253 tmp_suppressed_file_name = None
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
254 tmp_unmapped_file_name = None
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
255 if options.suppressHeader == 'true':
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
256 suppressHeader = '--sam-nohead'
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
257 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
258 suppressHeader = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
259 if options.maxInsert and int( options.maxInsert ) > 0:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
260 maxInsert = '-X %s' % options.maxInsert
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
261 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
262 maxInsert = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
263 if options.mateOrient:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
264 mateOrient = '--%s' % options.mateOrient
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
265 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
266 mateOrient = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
267 quality_score_encoding = GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG.get( options.galaxy_input_format, DEFAULT_ASCII_ENCODING )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
268 if options.params == 'preSet':
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
269 aligning_cmds = '-f %s %s -p %s -S %s %s %s ' % \
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
270 ( maxInsert, mateOrient, options.threads, suppressHeader, colorspace, quality_score_encoding )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
271
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
272 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
273 try:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
274 if options.skip and int( options.skip ) > 0:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
275 skip = '-s %s' % options.skip
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
276 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
277 skip = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
278 if options.alignLimit and int( options.alignLimit ) >= 0:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
279 alignLimit = '-u %s' % options.alignLimit
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
280 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
281 alignLimit = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
282 if options.trimH and int( options.trimH ) > 0:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
283 trimH = '-5 %s' % options.trimH
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
284 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
285 trimH = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
286 if options.trimL and int( options.trimL ) > 0:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
287 trimL = '-3 %s' % options.trimL
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
288 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
289 trimL = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
290 if options.maxMismatches and (options.maxMismatches == '0' or options.maxMismatches == '1' \
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
291 or options.maxMismatches == '2' or options.maxMismatches == '3'):
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
292 maxMismatches = '-v %s' % options.maxMismatches
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
293 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
294 maxMismatches = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
295 if options.mismatchSeed and (options.mismatchSeed == '0' or options.mismatchSeed == '1' \
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
296 or options.mismatchSeed == '2' or options.mismatchSeed == '3'):
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
297 mismatchSeed = '-n %s' % options.mismatchSeed
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
298 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
299 mismatchSeed = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
300 if options.mismatchQual and int( options.mismatchQual ) >= 1:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
301 mismatchQual = '-e %s' % options.mismatchQual
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
302 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
303 mismatchQual = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
304 if options.seedLen and int( options.seedLen ) >= 5:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
305 seedLen = '-l %s' % options.seedLen
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
306 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
307 seedLen = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
308 if options.rounding == 'noRound':
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
309 rounding = '--nomaqround'
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
310 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
311 rounding = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
312 if options.minInsert and int( options.minInsert ) > 0:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
313 minInsert = '-I %s' % options.minInsert
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
314 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
315 minInsert = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
316 if options.maxAlignAttempt and int( options.maxAlignAttempt ) >= 0:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
317 maxAlignAttempt = '--pairtries %s' % options.maxAlignAttempt
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
318 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
319 maxAlignAttempt = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
320 if options.forwardAlign == 'noForward':
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
321 forwardAlign = '--nofw'
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
322 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
323 forwardAlign = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
324 if options.reverseAlign == 'noReverse':
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
325 reverseAlign = '--norc'
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
326 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
327 reverseAlign = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
328 if options.maxBacktracks and int( options.maxBacktracks ) > 0 and \
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
329 ( options.mismatchSeed == '2' or options.mismatchSeed == '3' ):
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
330 maxBacktracks = '--maxbts %s' % options.maxBacktracks
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
331 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
332 maxBacktracks = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
333 if options.tryHard == 'doTryHard':
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
334 tryHard = '-y'
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
335 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
336 tryHard = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
337 if options.valAlign and int( options.valAlign ) >= 0:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
338 valAlign = '-k %s' % options.valAlign
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
339 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
340 valAlign = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
341 if options.allValAligns == 'doAllValAligns':
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
342 allValAligns = '-a'
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
343 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
344 allValAligns = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
345 if options.suppressAlign and int( options.suppressAlign ) >= 0:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
346 suppressAlign = '-m %s' % options.suppressAlign
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
347 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
348 suppressAlign = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
349 if options.best == 'doBest':
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
350 best = '--best'
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
351 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
352 best = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
353 if options.strata == 'doStrata':
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
354 strata = '--strata'
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
355 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
356 strata = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
357 if options.offrate and int( options.offrate ) >= 0:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
358 offrate = '-o %s' % options.offrate
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
359 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
360 offrate = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
361 if options.seed and int( options.seed ) >= 0:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
362 seed = '--seed %s' % options.seed
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
363 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
364 seed = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
365 if options.paired == 'paired':
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
366 if options.output_unmapped_reads_l and options.output_unmapped_reads_r:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
367 tmp_unmapped_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
368 tmp_unmapped_file_name = tmp_unmapped_file.name
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
369 tmp_unmapped_file.close()
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
370 output_unmapped_reads = '--un %s' % tmp_unmapped_file_name
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
371 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
372 output_unmapped_reads = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
373 if options.output_suppressed_reads:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
374 tmp_suppressed_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
375 tmp_suppressed_file_name = tmp_suppressed_file.name
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
376 tmp_suppressed_file.close()
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
377 output_suppressed_reads = '--max %s' % tmp_suppressed_file_name
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
378 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
379 output_suppressed_reads = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
380 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
381 if options.output_unmapped_reads:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
382 output_unmapped_reads = '--un %s' % options.output_unmapped_reads
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
383 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
384 output_unmapped_reads = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
385 if options.output_suppressed_reads:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
386 output_suppressed_reads = '--max %s' % options.output_suppressed_reads
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
387 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
388 output_suppressed_reads = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
389 snpfrac = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
390 if options.snpphred and int( options.snpphred ) >= 0:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
391 snpphred = '--snpphred %s' % options.snpphred
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
392 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
393 snpphred = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
394 if options.snpfrac and float( options.snpfrac ) >= 0:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
395 snpfrac = '--snpfrac %s' % options.snpfrac
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
396 if options.keepends and options.keepends == 'doKeepends':
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
397 keepends = '--col-keepends'
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
398 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
399 keepends = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
400
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
401
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
402
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
403
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
404
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
405 if options.filetype == 'f':
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
406 filetype = '-f'
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
407 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
408 filetype = '-q'
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
409
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
410 if options.outtype == 'S':
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
411 outtype = '-S'
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
412 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
413 outtype = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
414
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
415
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
416 aligning_cmds = '%s %s %s -p %s %s %s %s %s %s %s %s %s %s %s %s %s %s ' \
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
417 '%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s ' % \
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
418 ( filetype, maxInsert, mateOrient, options.threads, outtype, suppressHeader,
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
419 colorspace, skip, alignLimit, trimH, trimL, maxMismatches,
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
420 mismatchSeed, mismatchQual, seedLen, rounding, minInsert,
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
421 maxAlignAttempt, forwardAlign, reverseAlign, maxBacktracks,
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
422 tryHard, valAlign, allValAligns, suppressAlign, best,
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
423 strata, offrate, seed, snpphred, snpfrac, keepends,
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
424 output_unmapped_reads, output_suppressed_reads,
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
425 quality_score_encoding )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
426
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
427
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
428
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
429
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
430
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
431
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
432 except ValueError, e:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
433 # clean up temp dir
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
434 if os.path.exists( tmp_index_dir ):
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
435 shutil.rmtree( tmp_index_dir )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
436 stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
437 try:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
438 # have to nest try-except in try-finally to handle 2.4
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
439 try:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
440 # prepare actual mapping commands
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
441 if options.paired == 'paired':
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
442 cmd2 = 'bowtie %s %s -1 %s -2 %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.input2, options.output )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
443 else:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
444 cmd2 = 'bowtie %s %s %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.output )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
445 # align
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
446 tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
447 tmp_stderr = open( tmp, 'wb' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
448 proc = subprocess.Popen( args=cmd2, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
449 returncode = proc.wait()
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
450 tmp_stderr.close()
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
451 # get stderr, allowing for case where it's very large
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
452 tmp_stderr = open( tmp, 'rb' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
453 stderr = ''
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
454 buffsize = 1048576
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
455 try:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
456 while True:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
457 stderr += tmp_stderr.read( buffsize )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
458 if not stderr or len( stderr ) % buffsize != 0:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
459 break
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
460 except OverflowError:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
461 pass
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
462 tmp_stderr.close()
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
463 if returncode != 0:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
464 raise Exception, stderr
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
465 # get suppressed and unmapped reads output files in place if appropriate
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
466 if options.paired == 'paired' and tmp_suppressed_file_name and \
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
467 options.output_suppressed_reads_l and options.output_suppressed_reads_r:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
468 try:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
469 left = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
470 right = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
471 shutil.move( left, options.output_suppressed_reads_l )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
472 shutil.move( right, options.output_suppressed_reads_r )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
473 except Exception, e:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
474 sys.stdout.write( 'Error producing the suppressed output file.\n' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
475 if options.paired == 'paired' and tmp_unmapped_file_name and \
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
476 options.output_unmapped_reads_l and options.output_unmapped_reads_r:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
477 try:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
478 left = tmp_unmapped_file_name.replace( '.fastq', '_1.fastq' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
479 right = tmp_unmapped_file_name.replace( '.fastq', '_2.fastq' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
480 shutil.move( left, options.output_unmapped_reads_l )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
481 shutil.move( right, options.output_unmapped_reads_r )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
482 except Exception, e:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
483 sys.stdout.write( 'Error producing the unmapped output file.\n' )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
484 # check that there are results in the output file
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
485 if os.path.getsize( options.output ) == 0:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
486 raise Exception, 'The output file is empty, there may be an error with your input file or settings.'
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
487 except Exception, e:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
488 stop_err( 'Error aligning sequence. ' + str( e ) )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
489 finally:
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
490 # clean up temp dir
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
491 if os.path.exists( tmp_index_dir ):
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
492 shutil.rmtree( tmp_index_dir )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
493 stdout += 'Sequence file aligned.\n'
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
494 sys.stdout.write( stdout )
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
495
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
496 if __name__ == "__main__":
5b41acce238e Uploaded
kaymccoy
parents:
diff changeset
497 __main__()