| 
0
 | 
     1 #!/usr/bin/env python
 | 
| 
 | 
     2 
 | 
| 
 | 
     3 """
 | 
| 
 | 
     4 Runs Bowtie on single-end or paired-end data.
 | 
| 
 | 
     5 For use with Bowtie v. 0.12.7
 | 
| 
 | 
     6 
 | 
| 
 | 
     7 usage: bowtie_wrapper.py [options]
 | 
| 
 | 
     8     -t, --threads=t: The number of threads to run
 | 
| 
 | 
     9     -o, --output=o: The output file
 | 
| 
 | 
    10     --output_unmapped_reads=: File name for unmapped reads (single-end)
 | 
| 
 | 
    11     --output_unmapped_reads_l=: File name for unmapped reads (left, paired-end)
 | 
| 
 | 
    12     --output_unmapped_reads_r=: File name for unmapped reads (right, paired-end)
 | 
| 
 | 
    13     --output_suppressed_reads=: File name for suppressed reads because of max setting (single-end)
 | 
| 
 | 
    14     --output_suppressed_reads_l=: File name for suppressed reads because of max setting (left, paired-end)
 | 
| 
 | 
    15     --output_suppressed_reads_r=: File name for suppressed reads because of max setting (right, paired-end)
 | 
| 
 | 
    16     -i, --input1=i: The (forward or single-end) reads file in Sanger FASTQ format
 | 
| 
 | 
    17     -I, --input2=I: The reverse reads file in Sanger FASTQ format
 | 
| 
 | 
    18     -4, --dataType=4: The type of data (SOLiD or Solexa)
 | 
| 
 | 
    19     -2, --paired=2: Whether the data is single- or paired-end
 | 
| 
 | 
    20     -g, --genomeSource=g: The type of reference provided
 | 
| 
 | 
    21     -r, --ref=r: The reference genome to use or index
 | 
| 
 | 
    22     -s, --skip=s: Skip the first n reads
 | 
| 
 | 
    23     -a, --alignLimit=a: Only align the first n reads
 | 
| 
 | 
    24     -T, --trimH=T: Trim n bases from high-quality (left) end of each read before alignment
 | 
| 
 | 
    25     -L, --trimL=L: Trim n bases from low-quality (right) end of each read before alignment
 | 
| 
 | 
    26     -m, --mismatchSeed=m: Maximum number of mismatches permitted in the seed
 | 
| 
 | 
    27     -M, --mismatchQual=M: Maximum permitted total of quality values at mismatched read positions
 | 
| 
 | 
    28     -l, --seedLen=l: Seed length
 | 
| 
 | 
    29     -n, --rounding=n: Whether or not to round to the nearest 10 and saturating at 30
 | 
| 
 | 
    30     -P, --maqSoapAlign=P: Choose MAQ- or SOAP-like alignment policy
 | 
| 
 | 
    31     -w, --tryHard=: Whether or not to try as hard as possible to find valid alignments when they exist
 | 
| 
 | 
    32     -v, --valAlign=v: Report up to n valid arguments per read
 | 
| 
 | 
    33     -V, --allValAligns=V: Whether or not to report all valid alignments per read
 | 
| 
 | 
    34     -G, --suppressAlign=G: Suppress all alignments for a read if more than n reportable alignments exist
 | 
| 
 | 
    35     -b, --best=b: Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions
 | 
| 
 | 
    36     -B, --maxBacktracks=B: Maximum number of backtracks permitted when aligning a read
 | 
| 
 | 
    37     -R, --strata=R: Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable
 | 
| 
 | 
    38     -j, --minInsert=j: Minimum insert size for valid paired-end alignments
 | 
| 
 | 
    39     -J, --maxInsert=J: Maximum insert size for valid paired-end alignments
 | 
| 
 | 
    40     -O, --mateOrient=O: The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand
 | 
| 
 | 
    41     -A, --maxAlignAttempt=A: Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate
 | 
| 
 | 
    42     -f, --forwardAlign=f: Whether or not to attempt to align the forward reference strand
 | 
| 
 | 
    43     -E, --reverseAlign=E: Whether or not to attempt to align the reverse-complement reference strand
 | 
| 
 | 
    44     -F, --offrate=F: Override the offrate of the index to n
 | 
| 
 | 
    45     -8, --snpphred=8: SNP penalty on Phred scale
 | 
| 
 | 
    46     -6, --snpfrac=6: Fraction of sites expected to be SNP sites
 | 
| 
 | 
    47     -7, --keepends=7: Keep extreme-end nucleotides and qualities
 | 
| 
 | 
    48     -S, --seed=S: Seed for pseudo-random number generator
 | 
| 
 | 
    49     -C, --params=C: Whether to use default or specified parameters
 | 
| 
 | 
    50     -u, --iautoB=u: Automatic or specified behavior
 | 
| 
 | 
    51     -K, --ipacked=K: Whether or not to use a packed representation for DNA strings
 | 
| 
 | 
    52     -Q, --ibmax=Q: Maximum number of suffixes allowed in a block
 | 
| 
 | 
    53     -Y, --ibmaxdivn=Y: Maximum number of suffixes allowed in a block as a fraction of the length of the reference
 | 
| 
 | 
    54     -D, --idcv=D: The period for the difference-cover sample
 | 
| 
 | 
    55     -U, --inodc=U: Whether or not to disable the use of the difference-cover sample
 | 
| 
 | 
    56     -y, --inoref=y: Whether or not to build the part of the reference index used only in paired-end alignment
 | 
| 
 | 
    57     -z, --ioffrate=z: How many rows get marked during annotation of some or all of the Burrows-Wheeler rows
 | 
| 
 | 
    58     -W, --iftab=W: The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query
 | 
| 
 | 
    59     -X, --intoa=X: Whether or not to convert Ns in the reference sequence to As
 | 
| 
 | 
    60     -N, --iendian=N: Endianness to use when serializing integers to the index file
 | 
| 
 | 
    61     -Z, --iseed=Z: Seed for the pseudorandom number generator
 | 
| 
 | 
    62     -c, --icutoff=c: Number of first bases of the reference sequence to index
 | 
| 
 | 
    63     -x, --indexSettings=x: Whether or not indexing options are to be set
 | 
| 
 | 
    64     -H, --suppressHeader=H: Suppress header
 | 
| 
 | 
    65     --do_not_build_index: Flag to specify that provided file is already indexed and to just use 'as is'
 | 
| 
 | 
    66 """
 | 
| 
 | 
    67 
 | 
| 
 | 
    68 import optparse, os, shutil, subprocess, sys, tempfile
 | 
| 
 | 
    69 
 | 
| 
 | 
    70 #Allow more than Sanger encoded variants
 | 
| 
 | 
    71 DEFAULT_ASCII_ENCODING = '--phred33-quals'
 | 
| 
 | 
    72 GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG = { 'fastqsanger':'--phred33-quals', 'fastqillumina':'--phred64-quals', 'fastqsolexa':'--solexa-quals' }
 | 
| 
 | 
    73 #FIXME: Integer quality scores are supported only when the '--integer-quals' argument is specified to bowtie; this is not currently able to be set in the tool/wrapper/config
 | 
| 
 | 
    74 
 | 
| 
 | 
    75 def stop_err( msg ):
 | 
| 
 | 
    76     sys.stderr.write( '%s\n' % msg )
 | 
| 
 | 
    77     sys.exit()
 | 
| 
 | 
    78 
 | 
| 
 | 
    79 def __main__():
 | 
| 
 | 
    80     #Parse Command Line
 | 
| 
 | 
    81     parser = optparse.OptionParser()
 | 
| 
 | 
    82     parser.add_option( '-t', '--threads', dest='threads', help='The number of threads to run' )
 | 
| 
 | 
    83     parser.add_option( '-o', '--output', dest='output', help='The output file' )
 | 
| 
 | 
    84     parser.add_option( '', '--output_unmapped_reads', dest='output_unmapped_reads', help='File name for unmapped reads (single-end)' )
 | 
| 
 | 
    85     parser.add_option( '', '--output_unmapped_reads_l', dest='output_unmapped_reads_l', help='File name for unmapped reads (left, paired-end)' )
 | 
| 
 | 
    86     parser.add_option( '', '--output_unmapped_reads_r', dest='output_unmapped_reads_r', help='File name for unmapped reads (right, paired-end)' )
 | 
| 
 | 
    87     parser.add_option( '', '--output_suppressed_reads', dest='output_suppressed_reads', help='File name for suppressed reads because of max setting (single-end)' )
 | 
| 
 | 
    88     parser.add_option( '', '--output_suppressed_reads_l', dest='output_suppressed_reads_l', help='File name for suppressed reads because of max setting (left, paired-end)' )
 | 
| 
 | 
    89     parser.add_option( '', '--output_suppressed_reads_r', dest='output_suppressed_reads_r', help='File name for suppressed reads because of max setting (right, paired-end)' )
 | 
| 
 | 
    90     parser.add_option( '-4', '--dataType', dest='dataType', help='The type of data (SOLiD or Solexa)' )
 | 
| 
 | 
    91     parser.add_option( '-i', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' )
 | 
| 
 | 
    92     parser.add_option( '-I', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' )
 | 
| 
 | 
    93     parser.add_option( '-2', '--paired', dest='paired', help='Whether the data is single- or paired-end' )
 | 
| 
 | 
    94     parser.add_option( '-g', '--genomeSource', dest='genomeSource', help='The type of reference provided' )
 | 
| 
 | 
    95     parser.add_option( '-r', '--ref', dest='ref', help='The reference genome to use or index' )
 | 
| 
 | 
    96     parser.add_option( '-s', '--skip', dest='skip', help='Skip the first n reads' )
 | 
| 
 | 
    97     parser.add_option( '-a', '--alignLimit', dest='alignLimit', help='Only align the first n reads' )
 | 
| 
 | 
    98     parser.add_option( '-T', '--trimH', dest='trimH', help='Trim n bases from high-quality (left) end of each read before alignment' )
 | 
| 
 | 
    99     parser.add_option( '-L', '--trimL', dest='trimL', help='Trim n bases from low-quality (right) end of each read before alignment' )
 | 
| 
 | 
   100     parser.add_option( '-m', '--mismatchSeed', dest='mismatchSeed', help='Maximum number of mismatches permitted in the seed' )
 | 
| 
 | 
   101     parser.add_option( '-M', '--mismatchQual', dest='mismatchQual', help='Maximum permitted total of quality values at mismatched read positions' )
 | 
| 
 | 
   102     parser.add_option( '-l', '--seedLen', dest='seedLen', help='Seed length' )
 | 
| 
 | 
   103     parser.add_option( '-n', '--rounding', dest='rounding', help='Whether or not to round to the nearest 10 and saturating at 30' )
 | 
| 
 | 
   104     parser.add_option( '-P', '--maqSoapAlign', dest='maqSoapAlign', help='Choose MAQ- or SOAP-like alignment policy' )
 | 
| 
 | 
   105     parser.add_option( '-w', '--tryHard', dest='tryHard', help='Whether or not to try as hard as possible to find valid alignments when they exist' )
 | 
| 
 | 
   106     parser.add_option( '-v', '--valAlign', dest='valAlign', help='Report up to n valid arguments per read' )
 | 
| 
 | 
   107     parser.add_option( '-V', '--allValAligns', dest='allValAligns', help='Whether or not to report all valid alignments per read' )
 | 
| 
 | 
   108     parser.add_option( '-G', '--suppressAlign', dest='suppressAlign', help='Suppress all alignments for a read if more than n reportable alignments exist' )
 | 
| 
 | 
   109     parser.add_option( '-b', '--best', dest='best', help="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions" )
 | 
| 
 | 
   110     parser.add_option( '-B', '--maxBacktracks', dest='maxBacktracks', help='Maximum number of backtracks permitted when aligning a read' )
 | 
| 
 | 
   111     parser.add_option( '-R', '--strata', dest='strata', help='Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable' )
 | 
| 
 | 
   112     parser.add_option( '-j', '--minInsert', dest='minInsert', help='Minimum insert size for valid paired-end alignments' )
 | 
| 
 | 
   113     parser.add_option( '-J', '--maxInsert', dest='maxInsert', help='Maximum insert size for valid paired-end alignments' )
 | 
| 
 | 
   114     parser.add_option( '-O', '--mateOrient', dest='mateOrient', help='The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand' )
 | 
| 
 | 
   115     parser.add_option( '-A', '--maxAlignAttempt', dest='maxAlignAttempt', help='Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate' )
 | 
| 
 | 
   116     parser.add_option( '-f', '--forwardAlign', dest='forwardAlign', help='Whether or not to attempt to align the forward reference strand' )
 | 
| 
 | 
   117     parser.add_option( '-E', '--reverseAlign', dest='reverseAlign', help='Whether or not to attempt to align the reverse-complement reference strand' )
 | 
| 
 | 
   118     parser.add_option( '-F', '--offrate', dest='offrate', help='Override the offrate of the index to n' )
 | 
| 
 | 
   119     parser.add_option( '-S', '--seed', dest='seed', help='Seed for pseudo-random number generator' )
 | 
| 
 | 
   120     parser.add_option( '-8', '--snpphred', dest='snpphred', help='SNP penalty on Phred scale' )
 | 
| 
 | 
   121     parser.add_option( '-6', '--snpfrac', dest='snpfrac', help='Fraction of sites expected to be SNP sites' )
 | 
| 
 | 
   122     parser.add_option( '-7', '--keepends', dest='keepends', help='Keep extreme-end nucleotides and qualities' )
 | 
| 
 | 
   123     parser.add_option( '-C', '--params', dest='params', help='Whether to use default or specified parameters' )
 | 
| 
 | 
   124     parser.add_option( '-u', '--iautoB', dest='iautoB', help='Automatic or specified behavior' )
 | 
| 
 | 
   125     parser.add_option( '-K', '--ipacked', dest='ipacked', help='Whether or not to use a packed representation for DNA strings' )
 | 
| 
 | 
   126     parser.add_option( '-Q', '--ibmax', dest='ibmax', help='Maximum number of suffixes allowed in a block' )
 | 
| 
 | 
   127     parser.add_option( '-Y', '--ibmaxdivn', dest='ibmaxdivn', help='Maximum number of suffixes allowed in a block as a fraction of the length of the reference' )
 | 
| 
 | 
   128     parser.add_option( '-D', '--idcv', dest='idcv', help='The period for the difference-cover sample' )
 | 
| 
 | 
   129     parser.add_option( '-U', '--inodc', dest='inodc', help='Whether or not to disable the use of the difference-cover sample' )
 | 
| 
 | 
   130     parser.add_option( '-y', '--inoref', dest='inoref', help='Whether or not to build the part of the reference index used only in paired-end alignment' )
 | 
| 
 | 
   131     parser.add_option( '-z', '--ioffrate', dest='ioffrate', help='How many rows get marked during annotation of some or all of the Burrows-Wheeler rows' )
 | 
| 
 | 
   132     parser.add_option( '-W', '--iftab', dest='iftab', help='The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query' )
 | 
| 
 | 
   133     parser.add_option( '-X', '--intoa', dest='intoa', help='Whether or not to convert Ns in the reference sequence to As' )
 | 
| 
 | 
   134     parser.add_option( '-N', '--iendian', dest='iendian', help='Endianness to use when serializing integers to the index file' )
 | 
| 
 | 
   135     parser.add_option( '-Z', '--iseed', dest='iseed', help='Seed for the pseudorandom number generator' )
 | 
| 
 | 
   136     parser.add_option( '-c', '--icutoff', dest='icutoff', help='Number of first bases of the reference sequence to index' )
 | 
| 
 | 
   137     parser.add_option( '-x', '--indexSettings', dest='index_settings', help='Whether or not indexing options are to be set' )
 | 
| 
 | 
   138     parser.add_option( '-H', '--suppressHeader', dest='suppressHeader', help='Suppress header' )
 | 
| 
 | 
   139     parser.add_option( '--galaxy_input_format', dest='galaxy_input_format', default="fastqsanger", help='galaxy input format' )
 | 
| 
 | 
   140     parser.add_option( '--do_not_build_index', dest='do_not_build_index', action="store_true", default=False, help='Flag to specify that provided file is already indexed, use as is' )
 | 
| 
 | 
   141     (options, args) = parser.parse_args()
 | 
| 
 | 
   142     stdout = ''
 | 
| 
 | 
   143 
 | 
| 
 | 
   144     # make temp directory for placement of indices and copy reference file there if necessary
 | 
| 
 | 
   145     tmp_index_dir = tempfile.mkdtemp()
 | 
| 
 | 
   146     # get type of data (solid or solexa)
 | 
| 
 | 
   147     if options.dataType == 'solid':
 | 
| 
 | 
   148         colorspace = '-C'
 | 
| 
 | 
   149     else:
 | 
| 
 | 
   150         colorspace = ''
 | 
| 
 | 
   151     # index if necessary
 | 
| 
 | 
   152     if options.genomeSource == 'history' and not options.do_not_build_index:
 | 
| 
 | 
   153         # set up commands
 | 
| 
 | 
   154         if options.index_settings =='indexPreSet':
 | 
| 
 | 
   155             indexing_cmds = '%s' % colorspace
 | 
| 
 | 
   156         else:
 | 
| 
 | 
   157             try:
 | 
| 
 | 
   158                 if options.iautoB and options.iautoB == 'set':
 | 
| 
 | 
   159                     iautoB = '--noauto'
 | 
| 
 | 
   160                 else:
 | 
| 
 | 
   161                     iautoB = ''
 | 
| 
 | 
   162                 if options. ipacked and options.ipacked == 'packed':
 | 
| 
 | 
   163                     ipacked = '--packed'
 | 
| 
 | 
   164                 else:
 | 
| 
 | 
   165                     ipacked = ''
 | 
| 
 | 
   166                 if options.ibmax and int( options.ibmax ) >= 1:
 | 
| 
 | 
   167                     ibmax = '--bmax %s' % options.ibmax 
 | 
| 
 | 
   168                 else:
 | 
| 
 | 
   169                     ibmax = ''
 | 
| 
 | 
   170                 if options.ibmaxdivn and int( options.ibmaxdivn ) >= 0:
 | 
| 
 | 
   171                     ibmaxdivn = '--bmaxdivn %s' % options.ibmaxdivn
 | 
| 
 | 
   172                 else:
 | 
| 
 | 
   173                     ibmaxdivn = ''
 | 
| 
 | 
   174                 if options.idcv and int( options.idcv ) > 0:
 | 
| 
 | 
   175                     idcv = '--dcv %s' % options.idcv
 | 
| 
 | 
   176                 else:
 | 
| 
 | 
   177                     idcv = ''
 | 
| 
 | 
   178                 if options.inodc and options.inodc == 'nodc':
 | 
| 
 | 
   179                     inodc = '--nodc'
 | 
| 
 | 
   180                 else:
 | 
| 
 | 
   181                     inodc = ''
 | 
| 
 | 
   182                 if options.inoref and options.inoref == 'noref':
 | 
| 
 | 
   183                     inoref = '--noref'
 | 
| 
 | 
   184                 else:
 | 
| 
 | 
   185                     inoref = ''
 | 
| 
 | 
   186                 if options.iftab and int( options.iftab ) >= 0:
 | 
| 
 | 
   187                     iftab = '--ftabchars %s' % options.iftab
 | 
| 
 | 
   188                 else:
 | 
| 
 | 
   189                     iftab = ''
 | 
| 
 | 
   190                 if options.intoa and options.intoa == 'yes':
 | 
| 
 | 
   191                     intoa = '--ntoa'
 | 
| 
 | 
   192                 else:
 | 
| 
 | 
   193                     intoa = ''
 | 
| 
 | 
   194                 if options.iendian and options.iendian == 'big':
 | 
| 
 | 
   195                     iendian = '--big'
 | 
| 
 | 
   196                 else:
 | 
| 
 | 
   197                     iendian = '--little'
 | 
| 
 | 
   198                 if options.iseed and int( options.iseed ) > 0:
 | 
| 
 | 
   199                     iseed = '--seed %s' % options.iseed
 | 
| 
 | 
   200                 else:
 | 
| 
 | 
   201                     iseed = ''
 | 
| 
 | 
   202                 if options.icutoff and int( options.icutoff ) > 0:
 | 
| 
 | 
   203                     icutoff = '--cutoff %s' % options.icutoff
 | 
| 
 | 
   204                 else:
 | 
| 
 | 
   205                     icutoff = ''
 | 
| 
 | 
   206                 indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s %s' % \
 | 
| 
 | 
   207                                 ( iautoB, ipacked, ibmax, ibmaxdivn, idcv, inodc, 
 | 
| 
 | 
   208                                   inoref, options.ioffrate, iftab, intoa, iendian, 
 | 
| 
 | 
   209                                   iseed, icutoff, colorspace )
 | 
| 
 | 
   210             except ValueError, e:
 | 
| 
 | 
   211                 # clean up temp dir
 | 
| 
 | 
   212                 if os.path.exists( tmp_index_dir ):
 | 
| 
 | 
   213                     shutil.rmtree( tmp_index_dir )
 | 
| 
 | 
   214                 stop_err( "Something is wrong with the indexing parameters and the indexing and alignment could not be run. Make sure you don't have any non-numeric values where they should be numeric.\n" + str( e ) )
 | 
| 
 | 
   215         ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir )
 | 
| 
 | 
   216         ref_file_name = ref_file.name
 | 
| 
 | 
   217         ref_file.close()
 | 
| 
 | 
   218         os.symlink( options.ref, ref_file_name )
 | 
| 
 | 
   219         cmd1 = 'bowtie-build %s -f %s %s' % ( indexing_cmds, ref_file_name, ref_file_name )
 | 
| 
 | 
   220         try:
 | 
| 
 | 
   221             tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
 | 
| 
 | 
   222             tmp_stderr = open( tmp, 'wb' )
 | 
| 
 | 
   223             proc = subprocess.Popen( args=cmd1, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
 | 
| 
 | 
   224             returncode = proc.wait()
 | 
| 
 | 
   225             tmp_stderr.close()
 | 
| 
 | 
   226             # get stderr, allowing for case where it's very large
 | 
| 
 | 
   227             tmp_stderr = open( tmp, 'rb' )
 | 
| 
 | 
   228             stderr = ''
 | 
| 
 | 
   229             buffsize = 1048576
 | 
| 
 | 
   230             try:
 | 
| 
 | 
   231                 while True:
 | 
| 
 | 
   232                     stderr += tmp_stderr.read( buffsize )
 | 
| 
 | 
   233                     if not stderr or len( stderr ) % buffsize != 0:
 | 
| 
 | 
   234                         break
 | 
| 
 | 
   235             except OverflowError:
 | 
| 
 | 
   236                 pass
 | 
| 
 | 
   237             tmp_stderr.close()
 | 
| 
 | 
   238             if returncode != 0:
 | 
| 
 | 
   239                 raise Exception, stderr
 | 
| 
 | 
   240         except Exception, e:
 | 
| 
 | 
   241             # clean up temp dir
 | 
| 
 | 
   242             if os.path.exists( tmp_index_dir ):
 | 
| 
 | 
   243                 shutil.rmtree( tmp_index_dir )
 | 
| 
 | 
   244             stop_err( 'Error indexing reference sequence\n' + str( e ) )
 | 
| 
 | 
   245         stdout += 'File indexed. '
 | 
| 
 | 
   246     else:
 | 
| 
 | 
   247         ref_file_name = options.ref
 | 
| 
 | 
   248     # set up aligning and generate aligning command options
 | 
| 
 | 
   249     # automatically set threads in both cases
 | 
| 
 | 
   250     tmp_suppressed_file_name = None
 | 
| 
 | 
   251     tmp_unmapped_file_name = None
 | 
| 
 | 
   252     if options.suppressHeader == 'true':
 | 
| 
 | 
   253         suppressHeader = '--sam-nohead'
 | 
| 
 | 
   254     else:
 | 
| 
 | 
   255         suppressHeader = ''
 | 
| 
 | 
   256     if options.maxInsert and int( options.maxInsert ) > 0:
 | 
| 
 | 
   257         maxInsert = '-X %s' % options.maxInsert
 | 
| 
 | 
   258     else:
 | 
| 
 | 
   259         maxInsert = ''
 | 
| 
 | 
   260     if options.mateOrient:
 | 
| 
 | 
   261         mateOrient = '--%s' % options.mateOrient
 | 
| 
 | 
   262     else:
 | 
| 
 | 
   263         mateOrient = ''
 | 
| 
 | 
   264     quality_score_encoding = GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG.get( options.galaxy_input_format, DEFAULT_ASCII_ENCODING )
 | 
| 
 | 
   265     if options.params == 'preSet':
 | 
| 
 | 
   266         aligning_cmds = '-q %s %s -p %s -S %s %s %s ' % \
 | 
| 
 | 
   267                 ( maxInsert, mateOrient, options.threads, suppressHeader, colorspace, quality_score_encoding )
 | 
| 
 | 
   268     else:
 | 
| 
 | 
   269         try:
 | 
| 
 | 
   270             if options.skip and int( options.skip ) > 0:
 | 
| 
 | 
   271                 skip = '-s %s' % options.skip
 | 
| 
 | 
   272             else:
 | 
| 
 | 
   273                 skip = ''
 | 
| 
 | 
   274             if options.alignLimit and int( options.alignLimit ) >= 0:
 | 
| 
 | 
   275                 alignLimit = '-u %s' % options.alignLimit
 | 
| 
 | 
   276             else:
 | 
| 
 | 
   277                 alignLimit = ''
 | 
| 
 | 
   278             if options.trimH and int( options.trimH ) > 0:
 | 
| 
 | 
   279                 trimH = '-5 %s' % options.trimH
 | 
| 
 | 
   280             else:
 | 
| 
 | 
   281                 trimH = ''
 | 
| 
 | 
   282             if options.trimL and int( options.trimL ) > 0:
 | 
| 
 | 
   283                 trimL = '-3 %s' % options.trimL
 | 
| 
 | 
   284             else:
 | 
| 
 | 
   285                 trimL = ''
 | 
| 
 | 
   286             if options.maqSoapAlign != '-1' and int( options.maqSoapAlign ) >= 0:
 | 
| 
 | 
   287                 maqSoapAlign = '-v %s' % options.maqSoapAlign
 | 
| 
 | 
   288             else:
 | 
| 
 | 
   289                 maqSoapAlign = ''
 | 
| 
 | 
   290             if options.mismatchSeed and (options.mismatchSeed == '0' or options.mismatchSeed == '1' \
 | 
| 
 | 
   291                         or options.mismatchSeed == '2' or options.mismatchSeed == '3'):
 | 
| 
 | 
   292                 mismatchSeed = '-n %s' % options.mismatchSeed
 | 
| 
 | 
   293             else:
 | 
| 
 | 
   294                 mismatchSeed = ''
 | 
| 
 | 
   295             if options.mismatchQual and int( options.mismatchQual ) >= 0:
 | 
| 
 | 
   296                 mismatchQual = '-e %s' % options.mismatchQual
 | 
| 
 | 
   297             else:
 | 
| 
 | 
   298                 mismatchQual = ''
 | 
| 
 | 
   299             if options.seedLen and int( options.seedLen ) >= 5:
 | 
| 
 | 
   300                 seedLen = '-l %s' % options.seedLen
 | 
| 
 | 
   301             else:
 | 
| 
 | 
   302                 seedLen = ''
 | 
| 
 | 
   303             if options.rounding == 'noRound':
 | 
| 
 | 
   304                 rounding = '--nomaqround'
 | 
| 
 | 
   305             else:
 | 
| 
 | 
   306                 rounding = ''
 | 
| 
 | 
   307             if options.minInsert and int( options.minInsert ) > 0:
 | 
| 
 | 
   308                 minInsert = '-I %s' % options.minInsert
 | 
| 
 | 
   309             else:
 | 
| 
 | 
   310                 minInsert = ''
 | 
| 
 | 
   311             if options.maxAlignAttempt and int( options.maxAlignAttempt ) >= 0:
 | 
| 
 | 
   312                 maxAlignAttempt = '--pairtries %s' % options.maxAlignAttempt
 | 
| 
 | 
   313             else:
 | 
| 
 | 
   314                 maxAlignAttempt = ''
 | 
| 
 | 
   315             if options.forwardAlign == 'noForward':
 | 
| 
 | 
   316                 forwardAlign = '--nofw'
 | 
| 
 | 
   317             else:
 | 
| 
 | 
   318                 forwardAlign = ''
 | 
| 
 | 
   319             if options.reverseAlign == 'noReverse':
 | 
| 
 | 
   320                 reverseAlign = '--norc'
 | 
| 
 | 
   321             else:
 | 
| 
 | 
   322                 reverseAlign = ''
 | 
| 
 | 
   323             if options.maxBacktracks and int( options.maxBacktracks ) > 0 and \
 | 
| 
 | 
   324                     ( options.mismatchSeed == '2' or options.mismatchSeed == '3' ):
 | 
| 
 | 
   325                 maxBacktracks = '--maxbts %s' % options.maxBacktracks
 | 
| 
 | 
   326             else:
 | 
| 
 | 
   327                 maxBacktracks = ''
 | 
| 
 | 
   328             if options.tryHard == 'doTryHard':
 | 
| 
 | 
   329                 tryHard = '-y'
 | 
| 
 | 
   330             else:
 | 
| 
 | 
   331                 tryHard = ''
 | 
| 
 | 
   332             if options.valAlign and int( options.valAlign ) >= 0:
 | 
| 
 | 
   333                 valAlign = '-k %s' % options.valAlign
 | 
| 
 | 
   334             else:
 | 
| 
 | 
   335                 valAlign = ''
 | 
| 
 | 
   336             if options.allValAligns == 'doAllValAligns':
 | 
| 
 | 
   337                 allValAligns = '-a'
 | 
| 
 | 
   338             else:
 | 
| 
 | 
   339                 allValAligns = ''
 | 
| 
 | 
   340             if options.suppressAlign and int( options.suppressAlign ) >= 0:
 | 
| 
 | 
   341                 suppressAlign = '-m %s' % options.suppressAlign
 | 
| 
 | 
   342             else:
 | 
| 
 | 
   343                 suppressAlign = ''
 | 
| 
 | 
   344             if options.best == 'doBest':
 | 
| 
 | 
   345                 best = '--best'
 | 
| 
 | 
   346             else:
 | 
| 
 | 
   347                 best = ''
 | 
| 
 | 
   348             if options.strata == 'doStrata':
 | 
| 
 | 
   349                 strata = '--strata'
 | 
| 
 | 
   350             else:
 | 
| 
 | 
   351                 strata = ''
 | 
| 
 | 
   352             if options.offrate and int( options.offrate ) >= 0:
 | 
| 
 | 
   353                 offrate = '-o %s' % options.offrate
 | 
| 
 | 
   354             else:
 | 
| 
 | 
   355                 offrate = ''
 | 
| 
 | 
   356             if options.seed and int( options.seed ) >= 0:
 | 
| 
 | 
   357                 seed = '--seed %s' % options.seed
 | 
| 
 | 
   358             else:
 | 
| 
 | 
   359                 seed = ''
 | 
| 
 | 
   360             if options.paired == 'paired':
 | 
| 
 | 
   361                 if options.output_unmapped_reads_l and options.output_unmapped_reads_r:
 | 
| 
 | 
   362                     tmp_unmapped_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' )
 | 
| 
 | 
   363                     tmp_unmapped_file_name = tmp_unmapped_file.name
 | 
| 
 | 
   364                     tmp_unmapped_file.close()
 | 
| 
 | 
   365                     output_unmapped_reads = '--un %s' % tmp_unmapped_file_name
 | 
| 
 | 
   366                 else:
 | 
| 
 | 
   367                     output_unmapped_reads = ''
 | 
| 
 | 
   368                 if options.output_suppressed_reads:
 | 
| 
 | 
   369                     tmp_suppressed_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' )
 | 
| 
 | 
   370                     tmp_suppressed_file_name = tmp_suppressed_file.name
 | 
| 
 | 
   371                     tmp_suppressed_file.close()
 | 
| 
 | 
   372                     output_suppressed_reads = '--max %s' % tmp_suppressed_file_name
 | 
| 
 | 
   373                 else:
 | 
| 
 | 
   374                     output_suppressed_reads = ''
 | 
| 
 | 
   375             else:
 | 
| 
 | 
   376                 if options.output_unmapped_reads:
 | 
| 
 | 
   377                     output_unmapped_reads = '--un %s' % options.output_unmapped_reads
 | 
| 
 | 
   378                 else:
 | 
| 
 | 
   379                     output_unmapped_reads = ''
 | 
| 
 | 
   380                 if options.output_suppressed_reads:
 | 
| 
 | 
   381                     output_suppressed_reads = '--max %s' % options.output_suppressed_reads
 | 
| 
 | 
   382                 else:
 | 
| 
 | 
   383                     output_suppressed_reads = ''
 | 
| 
 | 
   384             snpfrac = ''
 | 
| 
 | 
   385             if options.snpphred and int( options.snpphred ) >= 0:
 | 
| 
 | 
   386                 snpphred = '--snpphred %s' % options.snpphred
 | 
| 
 | 
   387             else:
 | 
| 
 | 
   388                 snpphred = ''
 | 
| 
 | 
   389                 if options.snpfrac and float( options.snpfrac ) >= 0:
 | 
| 
 | 
   390                     snpfrac = '--snpfrac %s' % options.snpfrac
 | 
| 
 | 
   391             if options.keepends and options.keepends == 'doKeepends':
 | 
| 
 | 
   392                 keepends = '--col-keepends'
 | 
| 
 | 
   393             else:
 | 
| 
 | 
   394                 keepends = ''
 | 
| 
 | 
   395             aligning_cmds = '-q %s %s -p %s -S %s %s %s %s %s %s %s %s %s %s %s %s ' \
 | 
| 
 | 
   396                             '%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s ' % \
 | 
| 
 | 
   397                             ( maxInsert, mateOrient, options.threads, suppressHeader,
 | 
| 
 | 
   398                               colorspace, skip, alignLimit, trimH, trimL, maqSoapAlign,
 | 
| 
 | 
   399                               mismatchSeed, mismatchQual, seedLen, rounding, minInsert, 
 | 
| 
 | 
   400                               maxAlignAttempt, forwardAlign, reverseAlign, maxBacktracks,
 | 
| 
 | 
   401                               tryHard, valAlign, allValAligns, suppressAlign, best,
 | 
| 
 | 
   402                               strata, offrate, seed, snpphred, snpfrac, keepends,
 | 
| 
 | 
   403                               output_unmapped_reads, output_suppressed_reads,
 | 
| 
 | 
   404                               quality_score_encoding )
 | 
| 
 | 
   405         except ValueError, e:
 | 
| 
 | 
   406             # clean up temp dir
 | 
| 
 | 
   407             if os.path.exists( tmp_index_dir ):
 | 
| 
 | 
   408                 shutil.rmtree( tmp_index_dir )
 | 
| 
 | 
   409             stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) )
 | 
| 
 | 
   410     try:
 | 
| 
 | 
   411         # have to nest try-except in try-finally to handle 2.4
 | 
| 
 | 
   412         try:
 | 
| 
 | 
   413             # prepare actual mapping commands
 | 
| 
 | 
   414             if options.paired == 'paired':
 | 
| 
 | 
   415                 cmd2 = 'bowtie %s %s -1 %s -2 %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.input2, options.output )
 | 
| 
 | 
   416             else:
 | 
| 
 | 
   417                 cmd2 = 'bowtie %s %s %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.output )
 | 
| 
 | 
   418             # align
 | 
| 
 | 
   419             tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name
 | 
| 
 | 
   420             tmp_stderr = open( tmp, 'wb' )
 | 
| 
 | 
   421             proc = subprocess.Popen( args=cmd2, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno() )
 | 
| 
 | 
   422             returncode = proc.wait()
 | 
| 
 | 
   423             tmp_stderr.close()
 | 
| 
 | 
   424             # get stderr, allowing for case where it's very large
 | 
| 
 | 
   425             tmp_stderr = open( tmp, 'rb' )
 | 
| 
 | 
   426             stderr = ''
 | 
| 
 | 
   427             buffsize = 1048576
 | 
| 
 | 
   428             try:
 | 
| 
 | 
   429                 while True:
 | 
| 
 | 
   430                     stderr += tmp_stderr.read( buffsize )
 | 
| 
 | 
   431                     if not stderr or len( stderr ) % buffsize != 0:
 | 
| 
 | 
   432                         break
 | 
| 
 | 
   433             except OverflowError:
 | 
| 
 | 
   434                 pass
 | 
| 
 | 
   435             tmp_stderr.close()
 | 
| 
 | 
   436             if returncode != 0:
 | 
| 
 | 
   437                 raise Exception, stderr
 | 
| 
 | 
   438             # get suppressed and unmapped reads output files in place if appropriate
 | 
| 
 | 
   439             if options.paired == 'paired' and tmp_suppressed_file_name and \
 | 
| 
 | 
   440                                options.output_suppressed_reads_l and options.output_suppressed_reads_r:
 | 
| 
 | 
   441                 try:
 | 
| 
 | 
   442                     left = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' )
 | 
| 
 | 
   443                     right = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' )
 | 
| 
 | 
   444                     shutil.move( left, options.output_suppressed_reads_l )
 | 
| 
 | 
   445                     shutil.move( right, options.output_suppressed_reads_r )
 | 
| 
 | 
   446                 except Exception, e:
 | 
| 
 | 
   447                     sys.stdout.write( 'Error producing the suppressed output file.\n' )
 | 
| 
 | 
   448             if options.paired == 'paired' and tmp_unmapped_file_name and \
 | 
| 
 | 
   449                                options.output_unmapped_reads_l and options.output_unmapped_reads_r:
 | 
| 
 | 
   450                 try:
 | 
| 
 | 
   451                     left = tmp_unmapped_file_name.replace( '.fastq', '_1.fastq' )
 | 
| 
 | 
   452                     right = tmp_unmapped_file_name.replace( '.fastq', '_2.fastq' )
 | 
| 
 | 
   453                     shutil.move( left, options.output_unmapped_reads_l )
 | 
| 
 | 
   454                     shutil.move( right, options.output_unmapped_reads_r )
 | 
| 
 | 
   455                 except Exception, e:
 | 
| 
 | 
   456                     sys.stdout.write( 'Error producing the unmapped output file.\n' )
 | 
| 
 | 
   457             # check that there are results in the output file
 | 
| 
 | 
   458             if os.path.getsize( options.output ) == 0:
 | 
| 
 | 
   459                 raise Exception, 'The output file is empty, there may be an error with your input file or settings.'
 | 
| 
 | 
   460         except Exception, e:
 | 
| 
 | 
   461             stop_err( 'Error aligning sequence. ' + str( e ) )
 | 
| 
 | 
   462     finally:
 | 
| 
 | 
   463         # clean up temp dir
 | 
| 
 | 
   464         if os.path.exists( tmp_index_dir ):
 | 
| 
 | 
   465             shutil.rmtree( tmp_index_dir )
 | 
| 
 | 
   466     stdout += 'Sequence file aligned.\n'
 | 
| 
 | 
   467     sys.stdout.write( stdout )
 | 
| 
 | 
   468 
 | 
| 
 | 
   469 if __name__=="__main__": __main__()
 |