Mercurial > repos > jackcurragh > ribogalaxy_bowtie_genome
changeset 0:71f778b04f6e draft
Uploaded
author | jackcurragh |
---|---|
date | Tue, 22 Mar 2022 12:28:40 +0000 |
parents | |
children | fca653f557d9 |
files | bowtie_genome_wrapper/bowtie_genomic_wrapper.py bowtie_genome_wrapper/bowtie_genomic_wrapper.xml bowtie_genome_wrapper/tool-data/bowtie_indices.loc.sample bowtie_genome_wrapper/tool_data_table_conf.xml.sample |
diffstat | 4 files changed, 1485 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bowtie_genome_wrapper/bowtie_genomic_wrapper.py Tue Mar 22 12:28:40 2022 +0000 @@ -0,0 +1,472 @@ +#!/usr/bin/env python + +""" +Runs Bowtie on single-end or paired-end data. + +usage: bowtie_wrapper.py [options] + -t, --threads=t: The number of threads to run + -o, --output=o: The output file + --output_unmapped_reads=: File name for unmapped reads (single-end) + --output_unmapped_reads_l=: File name for unmapped reads (left, paired-end) + --output_unmapped_reads_r=: File name for unmapped reads (right, paired-end) + --output_suppressed_reads=: File name for suppressed reads because of max setting (single-end) + --output_suppressed_reads_l=: File name for suppressed reads because of max setting (left, paired-end) + --output_suppressed_reads_r=: File name for suppressed reads because of max setting (right, paired-end) + --output_mapping_stats=: File name for mapping statistics (output on stderr by bowtie) + -i, --input1=i: The (forward or single-end) reads file in Sanger FASTQ format + -I, --input2=I: The reverse reads file in Sanger FASTQ format + -4, --dataType=4: The type of data (SOLiD or Solexa) + -2, --paired=2: Whether the data is single- or paired-end + -g, --genomeSource=g: The type of reference provided + -r, --ref=r: The reference genome to use or index + -s, --skip=s: Skip the first n reads + -a, --alignLimit=a: Only align the first n reads + -T, --trimH=T: Trim n bases from high-quality (left) end of each read before alignment + -L, --trimL=L: Trim n bases from low-quality (right) end of each read before alignment + -m, --mismatchSeed=m: Maximum number of mismatches permitted in the seed + -M, --mismatchQual=M: Maximum permitted total of quality values at mismatched read positions + -l, --seedLen=l: Seed length + -n, --rounding=n: Whether or not to round to the nearest 10 and saturating at 30 + -P, --maxMismatches=P: Maximum number of mismatches for -v alignment mode + -w, --tryHard=: Whether or not to try as hard as possible to find valid alignments when they exist + -V, --allValAligns=V: Whether or not to report all valid alignments per read or pair + -v, --valAlign=v: Report up to n valid alignments per read or pair + -G, --suppressAlign=G: Suppress all alignments for a read if more than n reportable alignments exist + -b, --best=b: Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions + -B, --maxBacktracks=B: Maximum number of backtracks permitted when aligning a read + -R, --strata=R: Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable + -j, --minInsert=j: Minimum insert size for valid paired-end alignments + -J, --maxInsert=J: Maximum insert size for valid paired-end alignments + -O, --mateOrient=O: The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand + -A, --maxAlignAttempt=A: Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate + -f, --forwardAlign=f: Whether or not to attempt to align the forward reference strand + -E, --reverseAlign=E: Whether or not to attempt to align the reverse-complement reference strand + -F, --offrate=F: Override the offrate of the index to n + -8, --snpphred=8: SNP penalty on Phred scale + -6, --snpfrac=6: Fraction of sites expected to be SNP sites + -7, --keepends=7: Keep extreme-end nucleotides and qualities + -S, --seed=S: Seed for pseudo-random number generator + -C, --params=C: Whether to use default or specified parameters + -u, --iautoB=u: Automatic or specified behavior + -K, --ipacked=K: Whether or not to use a packed representation for DNA strings + -Q, --ibmax=Q: Maximum number of suffixes allowed in a block + -Y, --ibmaxdivn=Y: Maximum number of suffixes allowed in a block as a fraction of the length of the reference + -D, --idcv=D: The period for the difference-cover sample + -U, --inodc=U: Whether or not to disable the use of the difference-cover sample + -y, --inoref=y: Whether or not to build the part of the reference index used only in paired-end alignment + -z, --ioffrate=z: How many rows get marked during annotation of some or all of the Burrows-Wheeler rows + -W, --iftab=W: The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query + -X, --intoa=X: Whether or not to convert Ns in the reference sequence to As + -N, --iendian=N: Endianness to use when serializing integers to the index file + -Z, --iseed=Z: Seed for the pseudorandom number generator + -x, --indexSettings=x: Whether or not indexing options are to be set + -H, --suppressHeader=H: Suppress header + --do_not_build_index: Flag to specify that provided file is already indexed and to just use 'as is' +""" + +import optparse +import os +import shutil +import subprocess +import sys +import tempfile + +# Allow more than Sanger encoded variants +DEFAULT_ASCII_ENCODING = '--phred33-quals' +GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG = {'fastqsanger': '--phred33-quals', 'fastqillumina': '--phred64-quals', 'fastqsolexa': '--solexa-quals'} +# FIXME: Integer quality scores are supported only when the '--integer-quals' argument is specified to bowtie; this is not currently able to be set in the tool/wrapper/config + + +def stop_err( msg ): + sys.exit('%s\n' % msg) + + +def __main__(): + parser = optparse.OptionParser() + parser.add_option( '-t', '--threads', dest='threads', help='The number of threads to run' ) + parser.add_option( '-o', '--output', dest='output', help='The output file' ) + parser.add_option( '', '--output_unmapped_reads', dest='output_unmapped_reads', help='File name for unmapped reads (single-end)' ) + parser.add_option( '', '--output_unmapped_reads_l', dest='output_unmapped_reads_l', help='File name for unmapped reads (left, paired-end)' ) + parser.add_option( '', '--output_unmapped_reads_r', dest='output_unmapped_reads_r', help='File name for unmapped reads (right, paired-end)' ) + parser.add_option( '', '--output_suppressed_reads', dest='output_suppressed_reads', help='File name for suppressed reads because of max setting (single-end)' ) + parser.add_option( '', '--output_suppressed_reads_l', dest='output_suppressed_reads_l', help='File name for suppressed reads because of max setting (left, paired-end)' ) + parser.add_option( '', '--output_suppressed_reads_r', dest='output_suppressed_reads_r', help='File name for suppressed reads because of max setting (right, paired-end)' ) + parser.add_option( '', '--output_mapping_stats', dest='output_mapping_stats', help='File for mapping statistics (i.e. stderr from bowtie)' ) + parser.add_option( '-4', '--dataType', dest='dataType', help='The type of data (SOLiD or Solexa)' ) + parser.add_option( '-i', '--input1', dest='input1', help='The (forward or single-end) reads file in Sanger FASTQ format' ) + parser.add_option( '-I', '--input2', dest='input2', help='The reverse reads file in Sanger FASTQ format' ) + parser.add_option( '-2', '--paired', dest='paired', help='Whether the data is single- or paired-end' ) + parser.add_option( '-g', '--genomeSource', dest='genomeSource', help='The type of reference provided' ) + parser.add_option( '-r', '--ref', dest='ref', help='The reference genome to use or index' ) + parser.add_option( '-s', '--skip', dest='skip', help='Skip the first n reads' ) + parser.add_option( '-a', '--alignLimit', dest='alignLimit', help='Only align the first n reads' ) + parser.add_option( '-T', '--trimH', dest='trimH', help='Trim n bases from high-quality (left) end of each read before alignment' ) + parser.add_option( '-L', '--trimL', dest='trimL', help='Trim n bases from low-quality (right) end of each read before alignment' ) + parser.add_option( '-m', '--mismatchSeed', dest='mismatchSeed', help='Maximum number of mismatches permitted in the seed' ) + parser.add_option( '-M', '--mismatchQual', dest='mismatchQual', help='Maximum permitted total of quality values at mismatched read positions' ) + parser.add_option( '-l', '--seedLen', dest='seedLen', help='Seed length' ) + parser.add_option( '-n', '--rounding', dest='rounding', help='Whether or not to round to the nearest 10 and saturating at 30' ) + parser.add_option( '-P', '--maxMismatches', dest='maxMismatches', help='Maximum number of mismatches for -v alignment mode' ) + parser.add_option( '-w', '--tryHard', dest='tryHard', help='Whether or not to try as hard as possible to find valid alignments when they exist' ) + parser.add_option( '-V', '--allValAligns', dest='allValAligns', help='Whether or not to report all valid alignments per read or pair' ) + parser.add_option( '-v', '--valAlign', dest='valAlign', help='Report up to n valid alignments per read or pair' ) + parser.add_option( '-G', '--suppressAlign', dest='suppressAlign', help='Suppress all alignments for a read if more than n reportable alignments exist' ) + parser.add_option( '-b', '--best', dest='best', help="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions" ) + parser.add_option( '-B', '--maxBacktracks', dest='maxBacktracks', help='Maximum number of backtracks permitted when aligning a read' ) + parser.add_option( '-R', '--strata', dest='strata', help='Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable' ) + parser.add_option( '-j', '--minInsert', dest='minInsert', help='Minimum insert size for valid paired-end alignments' ) + parser.add_option( '-J', '--maxInsert', dest='maxInsert', help='Maximum insert size for valid paired-end alignments' ) + parser.add_option( '-O', '--mateOrient', dest='mateOrient', help='The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand' ) + parser.add_option( '-A', '--maxAlignAttempt', dest='maxAlignAttempt', help='Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate' ) + parser.add_option( '-f', '--forwardAlign', dest='forwardAlign', help='Whether or not to attempt to align the forward reference strand' ) + parser.add_option( '-E', '--reverseAlign', dest='reverseAlign', help='Whether or not to attempt to align the reverse-complement reference strand' ) + parser.add_option( '-F', '--offrate', dest='offrate', help='Override the offrate of the index to n' ) + parser.add_option( '-S', '--seed', dest='seed', help='Seed for pseudo-random number generator' ) + parser.add_option( '-8', '--snpphred', dest='snpphred', help='SNP penalty on Phred scale' ) + parser.add_option( '-6', '--snpfrac', dest='snpfrac', help='Fraction of sites expected to be SNP sites' ) + parser.add_option( '-7', '--keepends', dest='keepends', help='Keep extreme-end nucleotides and qualities' ) + parser.add_option( '-C', '--params', dest='params', help='Whether to use default or specified parameters' ) + parser.add_option( '-u', '--iautoB', dest='iautoB', help='Automatic or specified behavior' ) + parser.add_option( '-K', '--ipacked', dest='ipacked', help='Whether or not to use a packed representation for DNA strings' ) + parser.add_option( '-Q', '--ibmax', dest='ibmax', help='Maximum number of suffixes allowed in a block' ) + parser.add_option( '-Y', '--ibmaxdivn', dest='ibmaxdivn', help='Maximum number of suffixes allowed in a block as a fraction of the length of the reference' ) + parser.add_option( '-D', '--idcv', dest='idcv', help='The period for the difference-cover sample' ) + parser.add_option( '-U', '--inodc', dest='inodc', help='Whether or not to disable the use of the difference-cover sample' ) + parser.add_option( '-y', '--inoref', dest='inoref', help='Whether or not to build the part of the reference index used only in paired-end alignment' ) + parser.add_option( '-z', '--ioffrate', dest='ioffrate', help='How many rows get marked during annotation of some or all of the Burrows-Wheeler rows' ) + parser.add_option( '-W', '--iftab', dest='iftab', help='The size of the lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query' ) + parser.add_option( '-X', '--intoa', dest='intoa', help='Whether or not to convert Ns in the reference sequence to As' ) + parser.add_option( '-N', '--iendian', dest='iendian', help='Endianness to use when serializing integers to the index file' ) + parser.add_option( '-Z', '--iseed', dest='iseed', help='Seed for the pseudorandom number generator' ) + parser.add_option( '-x', '--indexSettings', dest='index_settings', help='Whether or not indexing options are to be set' ) + parser.add_option( '-H', '--suppressHeader', dest='suppressHeader', help='Suppress header' ) + parser.add_option( '--galaxy_input_format', dest='galaxy_input_format', default="fastqsanger", help='galaxy input format' ) + parser.add_option( '--do_not_build_index', dest='do_not_build_index', action="store_true", default=False, help='Flag to specify that provided file is already indexed, use as is' ) + (options, args) = parser.parse_args() + if options.mismatchSeed and options.maxMismatches: + parser.error("options --mismatchSeed and --maxMismatches are mutually exclusive") + stdout = '' + + # make temp directory for placement of indices and copy reference file there if necessary + tmp_index_dir = tempfile.mkdtemp() + # get type of data (solid or solexa) + if options.dataType == 'solid': + colorspace = '-C' + else: + colorspace = '' + # index if necessary + if options.genomeSource == 'history' and not options.do_not_build_index: + # set up commands + if options.index_settings == 'indexPreSet': + indexing_cmds = '%s' % colorspace + else: + try: + if options.iautoB and options.iautoB == 'set': + iautoB = '--noauto' + else: + iautoB = '' + if options.ipacked and options.ipacked == 'packed': + ipacked = '--packed' + else: + ipacked = '' + if options.ibmax and int( options.ibmax ) >= 1: + ibmax = '--bmax %s' % options.ibmax + else: + ibmax = '' + if options.ibmaxdivn and int( options.ibmaxdivn ) >= 0: + ibmaxdivn = '--bmaxdivn %s' % options.ibmaxdivn + else: + ibmaxdivn = '' + if options.idcv and int( options.idcv ) >= 3: + idcv = '--dcv %s' % options.idcv + else: + idcv = '' + if options.inodc and options.inodc == 'nodc': + inodc = '--nodc' + else: + inodc = '' + if options.inoref and options.inoref == 'noref': + inoref = '--noref' + else: + inoref = '' + if options.iftab and int( options.iftab ) >= 1: + iftab = '--ftabchars %s' % options.iftab + else: + iftab = '' + if options.intoa and options.intoa == 'yes': + intoa = '--ntoa' + else: + intoa = '' + if options.iendian and options.iendian == 'big': + iendian = '--big' + else: + iendian = '--little' + if options.iseed and int( options.iseed ) > 0: + iseed = '--seed %s' % options.iseed + else: + iseed = '' + indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s' % \ + ( iautoB, ipacked, ibmax, ibmaxdivn, idcv, inodc, + inoref, options.ioffrate, iftab, intoa, iendian, + iseed, colorspace ) + except ValueError as e: + # clean up temp dir + if os.path.exists( tmp_index_dir ): + shutil.rmtree( tmp_index_dir ) + stop_err( "Something is wrong with the indexing parameters and the indexing and alignment could not be run. Make sure you don't have any non-numeric values where they should be numeric.\n" + str( e ) ) + ref_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir ) + ref_file_name = ref_file.name + ref_file.close() + os.symlink( options.ref, ref_file_name ) + cmd1 = 'bowtie-build %s -f %s %s' % ( indexing_cmds, ref_file_name, ref_file_name ) + try: + tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name + with open(tmp, 'w') as tmp_stderr: + returncode = subprocess.call(args=cmd1, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno()) + if returncode != 0: + # get stderr, allowing for case where it's very large + stderr = '' + buffsize = 1048576 + with open(tmp, 'r') as tmp_stderr: + try: + while True: + stderr += tmp_stderr.read(buffsize) + if not stderr or len(stderr) % buffsize != 0: + break + except OverflowError: + pass + raise Exception(stderr) + except Exception as e: + # clean up temp dir + if os.path.exists( tmp_index_dir ): + shutil.rmtree( tmp_index_dir ) + stop_err( 'Error indexing reference sequence\n' + str( e ) ) + stdout += 'File indexed. ' + else: + ref_file_name = options.ref + # set up aligning and generate aligning command options + # automatically set threads in both cases + tmp_suppressed_file_name = None + tmp_unmapped_file_name = None + if options.suppressHeader == 'true': + suppressHeader = '--sam-nohead' + else: + suppressHeader = '' + if options.maxInsert and int( options.maxInsert ) > 0: + maxInsert = '-X %s' % options.maxInsert + else: + maxInsert = '' + if options.mateOrient: + mateOrient = '--%s' % options.mateOrient + else: + mateOrient = '' + quality_score_encoding = GALAXY_FORMAT_TO_QUALITY_SCORE_ENCODING_ARG.get( options.galaxy_input_format, DEFAULT_ASCII_ENCODING ) + if options.params == 'preSet': + aligning_cmds = '-q %s %s -p %s -S %s %s %s ' % \ + ( maxInsert, mateOrient, options.threads, suppressHeader, colorspace, quality_score_encoding ) + else: + try: + if options.skip and int( options.skip ) > 0: + skip = '-s %s' % options.skip + else: + skip = '' + if options.alignLimit and int( options.alignLimit ) >= 0: + alignLimit = '-u %s' % options.alignLimit + else: + alignLimit = '' + if options.trimH and int( options.trimH ) > 0: + trimH = '-5 %s' % options.trimH + else: + trimH = '' + if options.trimL and int( options.trimL ) > 0: + trimL = '-3 %s' % options.trimL + else: + trimL = '' + if options.maxMismatches and (options.maxMismatches == '0' or options.maxMismatches == '1' or + options.maxMismatches == '2' or options.maxMismatches == '3'): + maxMismatches = '-v %s' % options.maxMismatches + else: + maxMismatches = '' + if options.mismatchSeed and (options.mismatchSeed == '0' or options.mismatchSeed == '1' or + options.mismatchSeed == '2' or options.mismatchSeed == '3'): + mismatchSeed = '-n %s' % options.mismatchSeed + else: + mismatchSeed = '' + if options.mismatchQual and int( options.mismatchQual ) >= 1: + mismatchQual = '-e %s' % options.mismatchQual + else: + mismatchQual = '' + if options.seedLen and int( options.seedLen ) >= 5: + seedLen = '-l %s' % options.seedLen + else: + seedLen = '' + if options.rounding == 'noRound': + rounding = '--nomaqround' + else: + rounding = '' + if options.minInsert and int( options.minInsert ) > 0: + minInsert = '-I %s' % options.minInsert + else: + minInsert = '' + if options.maxAlignAttempt and int( options.maxAlignAttempt ) >= 0: + maxAlignAttempt = '--pairtries %s' % options.maxAlignAttempt + else: + maxAlignAttempt = '' + if options.forwardAlign == 'noForward': + forwardAlign = '--nofw' + else: + forwardAlign = '' + if options.reverseAlign == 'noReverse': + reverseAlign = '--norc' + else: + reverseAlign = '' + if options.maxBacktracks and int( options.maxBacktracks ) > 0 and \ + ( options.mismatchSeed == '2' or options.mismatchSeed == '3' ): + maxBacktracks = '--maxbts %s' % options.maxBacktracks + else: + maxBacktracks = '' + if options.tryHard == 'doTryHard': + tryHard = '-y' + else: + tryHard = '' + if options.valAlign and int( options.valAlign ) >= 0: + valAlign = '-k %s' % options.valAlign + else: + valAlign = '' + if options.allValAligns == 'doAllValAligns': + allValAligns = '-a' + else: + allValAligns = '' + if options.suppressAlign and int( options.suppressAlign ) >= 0: + suppressAlign = '-m %s' % options.suppressAlign + else: + suppressAlign = '' + if options.best == 'doBest': + best = '--best' + else: + best = '' + if options.strata == 'doStrata': + strata = '--strata' + else: + strata = '' + if options.offrate and int( options.offrate ) >= 0: + offrate = '-o %s' % options.offrate + else: + offrate = '' + if options.seed and int( options.seed ) >= 0: + seed = '--seed %s' % options.seed + else: + seed = '' + if options.paired == 'paired': + if options.output_unmapped_reads_l and options.output_unmapped_reads_r: + tmp_unmapped_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' ) + tmp_unmapped_file_name = tmp_unmapped_file.name + tmp_unmapped_file.close() + output_unmapped_reads = '--un %s' % tmp_unmapped_file_name + else: + output_unmapped_reads = '' + if options.output_suppressed_reads: + tmp_suppressed_file = tempfile.NamedTemporaryFile( dir=tmp_index_dir, suffix='.fastq' ) + tmp_suppressed_file_name = tmp_suppressed_file.name + tmp_suppressed_file.close() + output_suppressed_reads = '--max %s' % tmp_suppressed_file_name + else: + output_suppressed_reads = '' + else: + if options.output_unmapped_reads: + output_unmapped_reads = '--un %s' % options.output_unmapped_reads + else: + output_unmapped_reads = '' + if options.output_suppressed_reads: + output_suppressed_reads = '--max %s' % options.output_suppressed_reads + else: + output_suppressed_reads = '' + snpfrac = '' + if options.snpphred and int( options.snpphred ) >= 0: + snpphred = '--snpphred %s' % options.snpphred + else: + snpphred = '' + if options.snpfrac and float( options.snpfrac ) >= 0: + snpfrac = '--snpfrac %s' % options.snpfrac + if options.keepends and options.keepends == 'doKeepends': + keepends = '--col-keepends' + else: + keepends = '' + aligning_cmds = '-q %s %s -p %s -S %s %s %s %s %s %s %s %s %s %s %s %s ' \ + '%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s ' % \ + ( maxInsert, mateOrient, options.threads, suppressHeader, + colorspace, skip, alignLimit, trimH, trimL, maxMismatches, + mismatchSeed, mismatchQual, seedLen, rounding, minInsert, + maxAlignAttempt, forwardAlign, reverseAlign, maxBacktracks, + tryHard, valAlign, allValAligns, suppressAlign, best, + strata, offrate, seed, snpphred, snpfrac, keepends, + output_unmapped_reads, output_suppressed_reads, + quality_score_encoding ) + except ValueError as e: + # clean up temp dir + if os.path.exists( tmp_index_dir ): + shutil.rmtree( tmp_index_dir ) + stop_err( 'Something is wrong with the alignment parameters and the alignment could not be run\n' + str( e ) ) + try: + # have to nest try-except in try-finally to handle 2.4 + try: + # prepare actual mapping commands + if options.paired == 'paired': + cmd2 = 'bowtie %s %s -1 %s -2 %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.input2, options.output ) + else: + cmd2 = 'bowtie %s %s %s > %s' % ( aligning_cmds, ref_file_name, options.input1, options.output ) + # align + tmp = tempfile.NamedTemporaryFile( dir=tmp_index_dir ).name + with open(tmp, 'w') as tmp_stderr: + returncode = subprocess.call(args=cmd2, shell=True, cwd=tmp_index_dir, stderr=tmp_stderr.fileno()) + # get stderr, allowing for case where it's very large + stderr = '' + buffsize = 1048576 + with open(tmp, 'r') as tmp_stderr: + try: + while True: + stderr += tmp_stderr.read(buffsize) + if not stderr or len(stderr) % buffsize != 0: + break + except OverflowError: + pass + if returncode != 0: + raise Exception(stderr) + elif options.output_mapping_stats is not None: + # Write stderr (containing the mapping statistics) to a named file + with open(options.output_mapping_stats, 'w') as mapping_stats: + mapping_stats.write( stderr ) + # get suppressed and unmapped reads output files in place if appropriate + if options.paired == 'paired' and tmp_suppressed_file_name and \ + options.output_suppressed_reads_l and options.output_suppressed_reads_r: + try: + left = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' ) + right = tmp_suppressed_file_name.replace( '.fastq', '_1.fastq' ) + shutil.move( left, options.output_suppressed_reads_l ) + shutil.move( right, options.output_suppressed_reads_r ) + except Exception as e: + sys.stdout.write( 'Error producing the suppressed output file.\n' ) + if options.paired == 'paired' and tmp_unmapped_file_name and \ + options.output_unmapped_reads_l and options.output_unmapped_reads_r: + try: + left = tmp_unmapped_file_name.replace( '.fastq', '_1.fastq' ) + right = tmp_unmapped_file_name.replace( '.fastq', '_2.fastq' ) + shutil.move( left, options.output_unmapped_reads_l ) + shutil.move( right, options.output_unmapped_reads_r ) + except Exception as e: + sys.stdout.write( 'Error producing the unmapped output file.\n' ) + # check that there are results in the output file + if os.path.getsize( options.output ) == 0: + raise Exception('The output file is empty, there may be an error with your input file or settings.') + except Exception as e: + stop_err( 'Error aligning sequence. ' + str( e ) ) + finally: + # clean up temp dir + if os.path.exists( tmp_index_dir ): + shutil.rmtree( tmp_index_dir ) + stdout += 'Sequence file aligned.\n' + sys.stdout.write( stdout ) + + +if __name__ == "__main__": + __main__()
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bowtie_genome_wrapper/bowtie_genomic_wrapper.xml Tue Mar 22 12:28:40 2022 +0000 @@ -0,0 +1,968 @@ +<tool id="bowtie_genomic_wrapper" name="Align to the Genome with Bowtie" version="1.2.0"> + <description></description> + <requirements> + <requirement type="package" version="1.2.0">bowtie</requirement> + </requirements> + <version_command>bowtie --version</version_command> + <command> + python '$__tool_directory__/bowtie_genomic_wrapper.py' + ## Set number of threads + --threads="\${GALAXY_SLOTS:-4}" + ## Outputs + --output="${output}" + #if str( $singlePaired.sPaired ) == "single" + #if $output_unmapped_reads_l + --output_unmapped_reads="${output_unmapped_reads_l}" + #end if + #if $output_suppressed_reads_l + --output_suppressed_reads="${output_suppressed_reads_l}" + #end if + --galaxy_input_format="${singlePaired.sInput1.ext}" + #else + #if $output_unmapped_reads_l and $output_unmapped_reads_r + --output_unmapped_reads_l="${output_unmapped_reads_l}" + --output_unmapped_reads_r="${output_unmapped_reads_r}" + #end if + #if $output_suppressed_reads_l and $output_suppressed_reads_l + --output_suppressed_reads_l="${output_suppressed_reads_l}" + --output_suppressed_reads_r="${output_suppressed_reads_r}" + #end if + --galaxy_input_format="${singlePaired.pInput1.ext}" + #end if + ## Inputs + --dataType="solexa" ##this indicates that nucleotide base space is used in the wrapper + --suppressHeader="${suppressHeader}" + --genomeSource="${refGenomeSource.genomeSource}" + #if $refGenomeSource.genomeSource == "history": + ##index already exists + #if $refGenomeSource.ownFile.extension.startswith( 'bowtie_' ): + ##user previously built + --ref="${refGenomeSource.ownFile.extra_files_path}/${refGenomeSource.ownFile.metadata.base_name}" + --do_not_build_index + #else: + ##build index on the fly + --ref="${refGenomeSource.ownFile}" + --indexSettings="${refGenomeSource.indexParams.indexSettings}" + #if $refGenomeSource.indexParams.indexSettings == "indexFull": + --iautoB="${refGenomeSource.indexParams.autoBehavior.autoB}" + #if $refGenomeSource.indexParams.autoBehavior.autoB == "set": + --ipacked="${refGenomeSource.indexParams.autoBehavior.packed}" + --ibmax="${refGenomeSource.indexParams.autoBehavior.bmax}" + --ibmaxdivn="${refGenomeSource.indexParams.autoBehavior.bmaxdivn}" + --idcv="${refGenomeSource.indexParams.autoBehavior.dcv}" + #end if + --inodc="${refGenomeSource.indexParams.nodc}" + --inoref="${refGenomeSource.indexParams.noref}" + --ioffrate="${refGenomeSource.indexParams.offrate}" + --iftab="${refGenomeSource.indexParams.ftab}" + --intoa="${refGenomeSource.indexParams.ntoa}" + --iendian="${refGenomeSource.indexParams.endian}" + --iseed="${refGenomeSource.indexParams.seed}" + #end if + #end if + #else + ##use pre-built index + --ref="${refGenomeSource.index.fields.path}" + #end if + --paired="${singlePaired.sPaired}" + #if $singlePaired.sPaired == "single": + --input1="${singlePaired.sInput1}" + --params="${singlePaired.sParams.sSettingsType}" + #if $singlePaired.sParams.sSettingsType == "full": + --skip="${singlePaired.sParams.sSkip}" + --alignLimit="${singlePaired.sParams.sAlignLimit}" + --trimH="${singlePaired.sParams.sTrimH}" + --trimL="${singlePaired.sParams.sTrimL}" + #if $singlePaired.sParams.alignModeOption.alignMode == 'nMode' + --mismatchSeed="${singlePaired.sParams.alignModeOption.sMismatchSeed}" + --mismatchQual="${singlePaired.sParams.alignModeOption.sMismatchQual}" + --seedLen="${singlePaired.sParams.alignModeOption.sSeedLen}" + --rounding="${singlePaired.sParams.alignModeOption.sRounding}" + #else + --maxMismatches="${singlePaired.sParams.alignModeOption.maxMismatches}" + #end if + --forwardAlign="${singlePaired.sParams.sForwardAlign}" + --reverseAlign="${singlePaired.sParams.sReverseAlign}" + --tryHard="${singlePaired.sParams.sBestOption.sTryHardOption.sTryHard}" + --allValAligns="${singlePaired.sParams.sAllValAlignsOption.sAllValAligns}" + #if $singlePaired.sParams.sAllValAlignsOption.sAllValAligns == "noAllValAligns" + --valAlign="${singlePaired.sParams.sAllValAlignsOption.sValAlign}" + #end if + --suppressAlign="${singlePaired.sParams.sSuppressAlign}" + --best="${singlePaired.sParams.sBestOption.sBest}" + #if $singlePaired.sParams.sBestOption.sBest == "doBest": + --strata="${singlePaired.sParams.sBestOption.sdStrata}" + #if $singlePaired.sParams.sBestOption.sTryHardOption.sTryHard == "noTryHard" + --maxBacktracks="${singlePaired.sParams.sBestOption.sTryHardOption.sdMaxBacktracks}" + #end if + #else: + #if $singlePaired.sParams.sBestOption.sTryHardOption.sTryHard == "noTryHard" + --maxBacktracks="${singlePaired.sParams.sBestOption.sTryHardOption.snMaxBacktracks}" + #end if + #end if + --offrate="${singlePaired.sParams.sOffrate}" + --seed="${singlePaired.sParams.sSeed}" + #end if + #else: + --input1="${singlePaired.pInput1}" + --input2="${singlePaired.pInput2}" + --maxInsert="${singlePaired.pMaxInsert}" + --mateOrient="${singlePaired.pMateOrient}" + --params="${singlePaired.pParams.pSettingsType}" + #if $singlePaired.pParams.pSettingsType == "full": + --skip="${singlePaired.pParams.pSkip}" + --alignLimit="${singlePaired.pParams.pAlignLimit}" + --trimH="${singlePaired.pParams.pTrimH}" + --trimL="${singlePaired.pParams.pTrimL}" + #if $singlePaired.pParams.alignModeOption.alignMode == 'nMode' + --mismatchSeed="${singlePaired.pParams.alignModeOption.pMismatchSeed}" + --mismatchQual="${singlePaired.pParams.alignModeOption.pMismatchQual}" + --seedLen="${singlePaired.pParams.alignModeOption.pSeedLen}" + --rounding="${singlePaired.pParams.alignModeOption.pRounding}" + #else + --maxMismatches="${singlePaired.pParams.alignModeOption.maxMismatches}" + #end if + --minInsert="${singlePaired.pParams.pMinInsert}" + --forwardAlign="${singlePaired.pParams.pForwardAlign}" + --reverseAlign="${singlePaired.pParams.pReverseAlign}" + --tryHard="${singlePaired.pParams.pBestOption.pTryHardOption.pTryHard}" + --allValAligns="${singlePaired.pParams.pAllValAlignsOption.pAllValAligns}" + #if $singlePaired.pParams.pAllValAlignsOption.pAllValAligns == "noAllValAligns" + --valAlign="${singlePaired.pParams.pAllValAlignsOption.pValAlign}" + #end if + --suppressAlign="${singlePaired.pParams.pSuppressAlign}" + --best="${singlePaired.pParams.pBestOption.pBest}" + #if $singlePaired.pParams.pBestOption.pBest == "doBest": + --strata="${singlePaired.pParams.pBestOption.pdStrata}" + #if $singlePaired.pParams.pBestOption.pTryHardOption.pTryHard == "noTryHard" + --maxAlignAttempt="${singlePaired.pParams.pBestOption.pTryHardOption.pMaxAlignAttempt}" + --maxBacktracks="${singlePaired.pParams.pBestOption.pTryHardOption.pdMaxBacktracks}" + #end if + #else: + #if $singlePaired.pParams.pBestOption.pTryHardOption.pTryHard == "noTryHard" + --maxAlignAttempt="${singlePaired.pParams.pBestOption.pTryHardOption.pMaxAlignAttempt}" + --maxBacktracks="${singlePaired.pParams.pBestOption.pTryHardOption.pnMaxBacktracks}" + #end if + #end if + --offrate="${singlePaired.pParams.pOffrate}" + --seed="${singlePaired.pParams.pSeed}" + #end if + #end if + #if $save_mapping_stats + --output_mapping_stats="$mapping_stats" + #end if + </command> + <inputs> + <conditional name="refGenomeSource"> + <param name="genomeSource" type="select" label="Will you select a reference genome from your history or use a built-in index?" help="Built-ins were indexed using default options"> + <option value="indexed">Use a built-in index</option> + <option value="history">Use one from the history</option> + </param> + <when value="indexed"> + <param name="index" type="select" label="Select a reference genome" help="if your genome of interest is not listed - contact Galaxy team"> + <options from_data_table="bowtie_indexes"> + <filter type="sort_by" column="2" /> + <validator type="no_options" message="No indexes are available" /> + </options> + </param> + </when> + <when value="history"> + <param name="ownFile" type="data" format="bowtie_base_index,fasta" label="Select the reference genome" /> + <conditional name="indexParams"> + <param name="indexSettings" type="select" label="Choose whether to use Default options for building indices or to Set your own" help="These settings are ignored when using a prebuilt index"> + <option value="indexPreSet">Default</option> + <option value="indexFull">Set your own</option> + </param> + <when value="indexPreSet" /> + <when value="indexFull"> + <conditional name="autoBehavior"> + <param name="autoB" type="select" label="Choose to use automatic or specified behavior for some parameters (-a)" help="Allows you to set --packed, --bmax, --bmaxdivn, and --dcv"> + <option value="auto">Automatic behavior</option> + <option value="set">Set values (sets --noauto and allows others to be set)</option> + </param> + <when value="auto" /> + <when value="set"> + <param name="packed" type="select" label="Whether or not to use a packed representation for DNA strings (--packed)" help="Packed representation saves memory but makes indexing 2-3 times slower"> + <option value="unpacked">Use regular representation</option> + <option value="packed">Use packed representation</option> + </param> + <param name="bmax" type="integer" value="-1" label="Maximum number of suffixes allowed in a block (--bmax)" help="-1 for not specified. Must be at least 1" /> + <param name="bmaxdivn" type="integer" value="4" label="Maximum number of suffixes allowed in a block as a fraction of the length of the reference (--bmaxdivn)" /> + <param name="dcv" type="integer" value="1024" min="3" label="The period for the difference-cover sample (--dcv)" help="A larger period yields less memory overhead, but may make suffix sorting slower, especially if repeats are present" /> + </when> + </conditional> + <param name="nodc" type="select" label="Whether or not to disable the use of the difference-cover sample (--nodc)" help="Suffix sorting becomes quadratic-time in the worst case (with a very repetitive reference)"> + <option value="dc">Use difference-cover sample</option> + <option value="nodc">Disable difference-cover sample</option> + </param> + <param name="noref" type="select" label="Whether or not to build the part of the reference index used only in paired-end alignment (-r)"> + <option value="ref">Build all index files</option> + <option value="noref">Do not build paired-end alignment index files</option> + </param> + <param name="offrate" type="integer" value="5" min="0" label="The indexer will mark every 2^n Burrows-Wheeler rows with their corresponding location on the genome (-o)" help="Marking more rows makes reference-position lookups faster, but requires more memory to hold the annotations at runtime" /> + <param name="ftab" type="integer" value="10" min="1" label="The size of the ftab lookup table used to calculate an initial Burrows-Wheeler range with respect to the first n characters of the query (-t)" help="ftab size is 4^(n+1) bytes" /> + <param name="ntoa" type="select" label="Whether or not to convert Ns in the reference sequence to As (--ntoa)"> + <option value="no">Do not convert Ns</option> + <option value="yes">Convert Ns to As</option> + </param> + <param name="endian" type="select" label="Endianness to use when serializing integers to the index file (--big/--little)" help="Little is most appropriate for Intel- and AMD-based architecture"> + <option value="little">Little</option> + <option value="big">Big</option> + </param> + <param name="seed" type="integer" value="-1" label="Seed for the pseudorandom number generator (--seed)" help="Use -1 to use default" /> + </when> <!-- indexFull --> + </conditional> <!-- indexParams --> + </when> <!-- history --> + </conditional> <!-- refGenomeSource --> + <conditional name="singlePaired"> + <param name="sPaired" type="select" label="Is this library mate-paired?"> + <option value="single">Single-end</option> + <option value="paired">Paired-end</option> + </param> + <when value="single"> + <param name="sInput1" type="data" format="fastqsanger,fastqillumina,fastqsolexa" label="FASTQ file" help="Must have ASCII encoded quality scores"/> + <conditional name="sParams"> + <param name="sSettingsType" type="select" label="Bowtie settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full parameter list"> + <option value="preSet">Commonly used</option> + <option value="full" selected="true">Full parameter list</option> + </param> + <when value="preSet" /> + <when value="full"> + <param name="sSkip" type="integer" value="0" label="Skip the first n reads (-s)" /> + <param name="sAlignLimit" type="integer" value="-1" label="Only align the first n reads (-u)" help="-1 for off" /> + <param name="sTrimH" type="integer" value="0" label="Trim n bases from high-quality (left) end of each read before alignment (-5)" /> + <param name="sTrimL" type="integer" value="0" label="Trim n bases from low-quality (right) end of each read before alignment (-3)" /> + <conditional name="alignModeOption"> + <param name="alignMode" type="select" label="Alignment mode"> + <option value="nMode">Maq-like: quality-aware, limit mismatches in seed (-n)</option> + <option value="vMode">ignore qualities, limit end-to-end mismatches (-v)</option> + </param> + <when value="nMode"> + <param name="sMismatchSeed" type="integer" value="2" min="0" max="3" label="Maximum number of mismatches permitted in the seed (-n)" help="May be 0, 1, 2, or 3" /> + <param name="sMismatchQual" type="integer" value="70" min="1" label="Maximum permitted total of quality values at all mismatched read positions (-e)" /> + <param name="sSeedLen" type="integer" value="25" min="5" label="Seed length (-l)" help="Minimum value is 5" /> + <param name="sRounding" type="select" label="Whether or not to round to the nearest 10 and saturating at 30 (--nomaqround)" help="Maq accepts quality values in the Phred quality scale, but internally rounds values to the nearest 10, with a maximum of 30. By default, bowtie also rounds this way"> + <option value="round">Round to nearest 10</option> + <option value="noRound">Do not round to nearest 10</option> + </param> + </when> + <when value="vMode"> + <param name="maxMismatches" type="integer" value="" min="0" max="3" label="Maximum number of mismatches (-v)" help="May be 0, 1, 2, or 3" /> + </when> + </conditional> + <param name="sForwardAlign" type="select" label="Choose whether or not to attempt to align against the forward reference strand (--nofw)"> + <option value="forward">Align against the forward reference strand</option> + <option value="noForward">Do not align against the forward reference strand</option> + </param> + <param name="sReverseAlign" type="select" label="Choose whether or not to attempt to align against the reverse-complement reference strand (--norc)"> + <option value="reverse">Align against the reverse-complement reference strand</option> + <option value="noReverse">Do not align against the reverse-complement reference strand</option> + </param> + <conditional name="sBestOption"> + <param name="sBest" type="select" label="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions (--best)" help="Removes all strand bias. Only affects which alignments are reported by Bowtie. Runs slower with best option"> + <option value="noBest">Do not use best</option> + <option value="doBest">Use best</option> + </param> + <when value="noBest"> + <conditional name="sTryHardOption"> + <param name="sTryHard" type="select" label="Whether or not to try as hard as possible to find valid alignments when they exist (-y)" help="Tryhard mode is much slower than regular mode"> + <option value="noTryHard">Do not try hard</option> + <option value="doTryHard">Try hard</option> + </param> + <when value="noTryHard"> + <param name="snMaxBacktracks" type="integer" value="125" min="0" label="Maximum number of backtracks permitted when aligning a read (--maxbts)" /> + </when> + <when value="doTryHard" /> + </conditional> + </when> + <when value="doBest"> + <param name="sdStrata" type="select" label="Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable (--strata)"> + <option value="noStrata">Do not use strata option</option> + <option value="doStrata">Use strata option</option> + </param> + <conditional name="sTryHardOption"> + <param name="sTryHard" type="select" label="Whether or not to try as hard as possible to find valid alignments when they exist (-y)" help="Tryhard mode is much slower than regular mode"> + <option value="noTryHard">Do not try hard</option> + <option value="doTryHard">Try hard</option> + </param> + <when value="noTryHard"> + <param name="sdMaxBacktracks" type="integer" value="800" min="0" label="Maximum number of backtracks permitted when aligning a read (--maxbts)" /> + </when> + <when value="doTryHard" /> + </conditional> + </when> + </conditional> <!-- bestOption --> + <conditional name="sAllValAlignsOption"> + <param name="sAllValAligns" type="select" label="Whether or not to report all valid alignments per read (-a)"> + <option value="noAllValAligns">Do not report all valid alignments</option> + <option value="doAllValAligns">Report all valid alignments</option> + </param> + <when value="noAllValAligns"> + <param name="sValAlign" type="integer" value="1" min="1" label="Report up to n valid alignments per read (-k)" /> + </when> + <when value="doAllValAligns" /> + </conditional> + <param name="sSuppressAlign" type="integer" value="1" label="Suppress all alignments for a read if more than n reportable alignments exist (-m)" help="-1 for no limit" /> + <param name="sMaxFile" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write all reads with a number of valid alignments exceeding the limit set with the -m option to a file (--max)" /> + <param name="sUnmappedFile" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write all reads that could not be aligned to a file (--un)" /> + <param name="sOffrate" type="integer" value="-1" label="Override the offrate of the index to n (-o)" help="-1 for default" /> + <param name="sSeed" type="integer" value="-1" label="Seed for pseudo-random number generator (--seed)" help="-1 for default" /> + </when> <!-- full --> + </conditional> <!-- sParams --> + </when> <!-- single --> + <when value="paired"> + <param name="pInput1" type="data" format="fastqsanger,fastqillumina,fastqsolexa" label="Forward FASTQ file" help="Must have ASCII encoded quality scores"/> + <param name="pInput2" type="data" format="fastqsanger,fastqillumina,fastqsolexa" label="Reverse FASTQ file" help="File format must match the Forward FASTQ file"> + <options options_filter_attribute="ext" from_parameter="tool.app.datatypes_registry.datatypes_by_extension" transform_lines="obj.keys()"> + <column name="name" index="0"/> + <column name="value" index="0"/> + <filter type="param_value" ref="pInput1" ref_attribute="ext" column="0"/> + </options> + </param> + <param name="pMaxInsert" type="integer" value="1000" label="Maximum insert size for valid paired-end alignments (-X)" /> + <param name="pMateOrient" type="select" label="The upstream/downstream mate orientation for valid paired-end alignment against the forward reference strand (--fr/--rf/--ff)"> + <option value="fr">FR (for Illumina)</option> + <option value="rf">RF</option> + <option value="ff">FF (for SOLiD)</option> + </param> + <conditional name="pParams"> + <param name="pSettingsType" type="select" label="Bowtie settings to use" help="For most mapping needs use Commonly used settings. If you want full control use Full parameter list"> + <option value="preSet">Commonly used</option> + <option value="full">Full parameter list</option> + </param> + <when value="preSet" /> + <when value="full"> + <param name="pSkip" type="integer" value="0" label="Skip the first n pairs (-s)" /> + <param name="pAlignLimit" type="integer" value="-1" label="Only align the first n pairs (-u)" help="-1 for off" /> + <param name="pTrimH" type="integer" value="0" label="Trim n bases from high-quality (left) end of each read before alignment (-5)" /> + <param name="pTrimL" type="integer" value="0" label="Trim n bases from low-quality (right) end of each read before alignment (-3)" /> + <conditional name="alignModeOption"> + <param name="alignMode" type="select" label="Alignment mode"> + <option value="nMode" selected="true">Maq-like: quality-aware, limit mismatches in seed (-n)</option> + <option value="vMode">ignore qualities, limit end-to-end mismatches (-v)</option> + </param> + <when value="nMode"> + <param name="pMismatchSeed" type="integer" value="2" min="0" max="3" label="Maximum number of mismatches permitted in the seed (-n)" help="May be 0, 1, 2, or 3" /> + <param name="pMismatchQual" type="integer" value="70" min="1" label="Maximum permitted total of quality values at all mismatched read positions (-e)" /> + <param name="pSeedLen" type="integer" value="25" min="5" label="Seed length (-l)" help="Minimum value is 5" /> + <param name="pRounding" type="select" label="Whether or not to round to the nearest 10 and saturating at 30 (--nomaqround)" help="Maq accepts quality values in the Phred quality scale, but internally rounds values to the nearest 10, with a maximum of 30. By default, bowtie also rounds this way"> + <option value="round">Round to nearest 10</option> + <option value="noRound">Do not round to nearest 10</option> + </param> + </when> + <when value="vMode"> + <param name="maxMismatches" type="integer" value="" min="0" max="3" label="Maximum number of mismatches (-v)" help="May be 0, 1, 2, or 3" /> + </when> + </conditional> + <param name="pMinInsert" type="integer" value="0" label="Minimum insert size for valid paired-end alignments (-I)" /> + <param name="pForwardAlign" type="select" label="Choose whether or not to attempt to align against the forward reference strand (--nofw)"> + <option value="forward">Align against the forward reference strand</option> + <option value="noForward">Do not align against the forward reference strand</option> + </param> + <param name="pReverseAlign" type="select" label="Choose whether or not to attempt to align against the reverse-complement reference strand (--norc)"> + <option value="reverse">Align against the reverse-complement reference strand</option> + <option value="noReverse">Do not align against the reverse-complement reference strand</option> + </param> + <conditional name="pBestOption"> + <param name="pBest" type="select" label="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions (--best)" help="Removes all strand bias. Only affects which alignments are reported by Bowtie. Runs slower with best option"> + <option value="noBest">Do not use best</option> + <option value="doBest">Use best</option> + </param> + <when value="noBest"> + <conditional name="pTryHardOption"> + <param name="pTryHard" type="select" label="Whether or not to try as hard as possible to find valid alignments when they exist (-y)" help="Tryhard mode is much slower than regular mode"> + <option value="noTryHard">Do not try hard</option> + <option value="doTryHard">Try hard</option> + </param> + <when value="noTryHard"> + <param name="pMaxAlignAttempt" type="integer" value="100" min="1" label="Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate (--pairtries)" /> + <param name="pnMaxBacktracks" type="integer" value="125" min="0" label="Maximum number of backtracks permitted when aligning a read (--maxbts)" /> + </when> + <when value="doTryHard" /> + </conditional> + </when> + <when value="doBest"> + <param name="pdStrata" type="select" label="Whether or not to report only those alignments that fall in the best stratum if many valid alignments exist and are reportable (--strata)"> + <option value="noStrata">Do not use strata option</option> + <option value="doStrata">Use strata option</option> + </param> + <conditional name="pTryHardOption"> + <param name="pTryHard" type="select" label="Whether or not to try as hard as possible to find valid alignments when they exist (-y)" help="Tryhard mode is much slower than regular mode"> + <option value="noTryHard">Do not try hard</option> + <option value="doTryHard">Try hard</option> + </param> + <when value="noTryHard"> + <param name="pMaxAlignAttempt" type="integer" value="100" min="1" label="Maximum number of attempts Bowtie will make to match an alignment for one mate with an alignment for the opposite mate (--pairtries)" /> + <param name="pdMaxBacktracks" type="integer" value="800" min="0" label="Maximum number of backtracks permitted when aligning a read (--maxbts)" /> + </when> + <when value="doTryHard" /> + </conditional> + </when> + </conditional> + <conditional name="pAllValAlignsOption"> + <param name="pAllValAligns" type="select" label="Whether or not to report all valid alignments per pair (-a)"> + <option value="noAllValAligns">Do not report all valid alignments</option> + <option value="doAllValAligns">Report all valid alignments</option> + </param> + <when value="noAllValAligns"> + <param name="pValAlign" type="integer" value="1" min="1" label="Report up to n valid alignments per pair (-k)" /> + </when> + <when value="doAllValAligns" /> + </conditional> + <param name="pSuppressAlign" type="integer" value="-1" label="Suppress all alignments for a pair if more than n reportable alignments exist (-m)" help="-1 for no limit" /> + <param name="pMaxFile" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write all reads with a number of valid alignments exceeding the limit set with the -m option to a file (--max)" /> + <param name="pUnmappedFile" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Write all reads that could not be aligned to a file (--un)" /> + <param name="pOffrate" type="integer" value="-1" label="Override the offrate of the index to n (-o)" help="-1 for default" /> + <param name="pSeed" type="integer" value="-1" label="Seed for pseudo-random number generator (--seed)" help="-1 for default" /> + </when> <!-- full --> + </conditional> <!-- pParams --> + </when> <!-- paired --> + </conditional> <!-- singlePaired --> + <param name="save_mapping_stats" type="boolean" checked="False" label="Save the bowtie mapping statistics to the history" /> + <param name="suppressHeader" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Suppress the header in the output SAM file (--sam-nohead)" help="Bowtie produces SAM with several lines of header information by default" /> + </inputs> + <outputs> + <data format="sam" name="output" label="${tool.name} on ${on_string}: mapped reads"> + <actions> + <conditional name="refGenomeSource.genomeSource"> + <when value="indexed"> + <action type="metadata" name="dbkey"> + <option type="from_data_table" name="bowtie_indexes" column="1" offset="0"> + <filter type="param_value" column="0" value="#" compare="startswith" keep="False"/> + <filter type="param_value" ref="refGenomeSource.index" column="0"/> + </option> + </action> + </when> + <when value="history"> + <action type="metadata" name="dbkey"> + <option type="from_param" name="refGenomeSource.ownFile" param_attribute="dbkey" /> + </action> + </when> + </conditional> + </actions> + </data> + <data format="txt" name="mapping_stats" label="${tool.name} on ${on_string}: mapping stats"> + <filter>save_mapping_stats is True</filter> + </data> + <data format="fastq" name="output_suppressed_reads_l" label="${tool.name} on ${on_string}: suppressed reads (L)"> + <filter>(( + singlePaired['sPaired'] == "single" and + singlePaired['sParams']['sSettingsType'] == "full" and + singlePaired['sParams']['sMaxFile'] is True + ) or ( + singlePaired['sPaired'] == "paired" and + singlePaired['pParams']['pSettingsType'] == "full" and + singlePaired['pParams']['pMaxFile'] is True + )) + </filter> + <actions> + <conditional name="singlePaired.sPaired"> + <when value="single"> + <action type="format"> + <option type="from_param" name="singlePaired.sInput1" param_attribute="ext" /> + </action> + </when> + <when value="paired"> + <action type="format"> + <option type="from_param" name="singlePaired.pInput1" param_attribute="ext" /> + </action> + </when> + </conditional> + </actions> + </data> + <data format="fastq" name="output_suppressed_reads_r" label="${tool.name} on ${on_string}: suppressed reads (R)"> + <filter>singlePaired['sPaired'] == "paired"</filter> + <filter>singlePaired['pParams']['pSettingsType'] == "full"</filter> + <filter>singlePaired['pParams']['pMaxFile'] is True</filter> + <actions> + <conditional name="singlePaired.sPaired"> + <when value="single"> + <action type="format"> + <option type="from_param" name="singlePaired.sInput1" param_attribute="ext" /> + </action> + </when> + <when value="paired"> + <action type="format"> + <option type="from_param" name="singlePaired.pInput1" param_attribute="ext" /> + </action> + </when> + </conditional> + </actions> + </data> + <data format="fastq" name="output_unmapped_reads_l" label="${tool.name} on ${on_string}: unmapped reads (L)"> + <filter> + (( + singlePaired['sPaired'] == "single" and + singlePaired['sParams']['sSettingsType'] == "full" and + singlePaired['sParams']['sUnmappedFile'] is True + ) or ( + singlePaired['sPaired'] == "paired" and + singlePaired['pParams']['pSettingsType'] == "full" and + singlePaired['pParams']['pUnmappedFile'] is True + )) + </filter> + <actions> + <conditional name="singlePaired.sPaired"> + <when value="single"> + <action type="format"> + <option type="from_param" name="singlePaired.sInput1" param_attribute="ext" /> + </action> + </when> + <when value="paired"> + <action type="format"> + <option type="from_param" name="singlePaired.pInput1" param_attribute="ext" /> + </action> + </when> + </conditional> + </actions> + </data> + <data format="fastq" name="output_unmapped_reads_r" label="${tool.name} on ${on_string}: unmapped reads (R)"> + <filter>singlePaired['sPaired'] == "paired"</filter> + <filter>singlePaired['pParams']['pSettingsType'] == "full"</filter> + <filter>singlePaired['pParams']['pUnmappedFile'] is True</filter> + <actions> + <conditional name="singlePaired.sPaired"> + <when value="single"> + <action type="format"> + <option type="from_param" name="singlePaired.sInput1" param_attribute="ext" /> + </action> + </when> + <when value="paired"> + <action type="format"> + <option type="from_param" name="singlePaired.pInput1" param_attribute="ext" /> + </action> + </when> + </conditional> + </actions> + </data> + </outputs> + <tests> + <test> + <!-- + Bowtie command: + bowtie -q -p 4 -S +sam-nohead chrM_base test-data/bowtie_in2.fastqsanger > bowtie_out6_u.sam + sort bowtie_out6_u.sam > bowtie_out6.sam + -p is the number of threads. You need to replace the + with 2 dashes. + chrM_base needs to be the base location/name of the index files. + --> + <param name="genomeSource" value="indexed" /> + <!-- this is the backwards-compatible "unique value" for this index, not an actual path --> + <param name="index" value="equCab2chrM" /> + <param name="sPaired" value="single" /> + <param name="sInput1" ftype="fastqsanger" value="bowtie_in2.fastqsanger" /> + <param name="sSettingsType" value="preSet" /> + <param name="suppressHeader" value="true" /> + <output name="output" ftype="sam" file="bowtie_out6.sam" sort="True"> + <metadata name="dbkey" value="equCab2" /> + </output> + </test> + <test> + <!-- + Bowtie command: + bowtie-build -f test-data/phiX.fasta phiX_base + bowtie -q -X 1000 +ff -p 4 -S +sam-nohead -n 2 -e 70 -l 28 +pairtries 100 +maxbts 800 +best +un bowtie_out8_u.fastq phiX_base -1 test-data/bowtie_in5.fastqsanger -2 test-data/bowtie_in6.fastqsanger > bowtie_out7_u.sam + sort bowtie_out7_u.sam > bowtie_out7.sam + sort bowtie_out8_u_1.sam > bowtie_out8_1.sam + sort bowtie_out8_u_2.sam > bowtie_out8_2.sam + Then also need to modify bowtie_out8_1.sam and bowtie_out8_2.sam so that all @ lines come before sequence lines. + -p is the number of threads. You need to replace the + with 2 dashes. + The two unmapped output files will be named bowtie_out8_1.fastq and bowtie_out8_2.fastq. + chrM_base is the index files' location/base name. + --> + <param name="genomeSource" value="history" /> + <param name="ownFile" value="phiX.fasta" /> + <param name="indexSettings" value="indexPreSet" /> + <param name="sPaired" value="paired" /> + <param name="pInput1" ftype="fastqsanger" value="bowtie_in5.fastqsanger" /> + <param name="pInput2" ftype="fastqsanger" value="bowtie_in6.fastqsanger" /> + <param name="pMaxInsert" value="1000" /> + <param name="pMateOrient" value="ff" /> + <param name="pSettingsType" value="full" /> + <param name="pSkip" value="0" /> + <param name="pAlignLimit" value="-1" /> + <param name="pTrimH" value="0" /> + <param name="pTrimL" value="0" /> + <param name="alignMode" value="nMode" /> + <param name="pMismatchSeed" value="2" /> + <param name="pMismatchQual" value="70" /> + <param name="pSeedLen" value="28" /> + <param name="pRounding" value="round" /> + <param name="pMinInsert" value="0" /> + <param name="pMaxAlignAttempt" value="100" /> + <param name="pForwardAlign" value="forward" /> + <param name="pReverseAlign" value="reverse" /> + <param name="pTryHard" value="noTryHard" /> + <param name="pValAlign" value="1" /> + <param name="pAllValAligns" value="noAllValAligns" /> + <param name="pSuppressAlign" value="-1" /> + <param name="pUnmappedFile" value="true" /> + <param name="pMaxFile" value="false" /> + <param name="pBest" value="doBest" /> + <param name="pdMaxBacktracks" value="800" /> + <param name="pdStrata" value="noStrata" /> + <param name="pOffrate" value="-1" /> + <param name="pSeed" value="-1" /> + <param name="suppressHeader" value="true" /> + <output name="output" ftype="sam" file="bowtie_out7.sam" sort="True" /> + <output name="output_unmapped_reads_l" ftype="fastqsanger" file="bowtie_out8_1.fastq" sort="True" /> + <output name="output_unmapped_reads_r" ftype="fastqsanger" file="bowtie_out8_2.fastq" sort="True" /> + </test> + <!-- start testing of non-sanger variant fastq reads --> + <test> + <param name="genomeSource" value="history" /> + <param name="ownFile" value="phiX.fasta" /> + <param name="indexSettings" value="indexPreSet" /> + <param name="sPaired" value="paired" /> + <param name="pInput1" ftype="fastqillumina" value="bowtie_in5.fastqillumina" /> + <param name="pInput2" ftype="fastqillumina" value="bowtie_in6.fastqillumina" /> + <param name="pMaxInsert" value="1000" /> + <param name="pMateOrient" value="ff" /> + <param name="pSettingsType" value="full" /> + <param name="pSkip" value="0" /> + <param name="pAlignLimit" value="-1" /> + <param name="pTrimH" value="0" /> + <param name="pTrimL" value="0" /> + <param name="alignMode" value="nMode" /> + <param name="pMismatchSeed" value="2" /> + <param name="pMismatchQual" value="70" /> + <param name="pSeedLen" value="28" /> + <param name="pRounding" value="round" /> + <param name="pMinInsert" value="0" /> + <param name="pMaxAlignAttempt" value="100" /> + <param name="pForwardAlign" value="forward" /> + <param name="pReverseAlign" value="reverse" /> + <param name="pTryHard" value="noTryHard" /> + <param name="pValAlign" value="1" /> + <param name="pAllValAligns" value="noAllValAligns" /> + <param name="pSuppressAlign" value="-1" /> + <param name="pUnmappedFile" value="true" /> + <param name="pMaxFile" value="false" /> + <param name="pBest" value="doBest" /> + <param name="pdMaxBacktracks" value="800" /> + <param name="pdStrata" value="noStrata" /> + <param name="pOffrate" value="-1" /> + <param name="pSeed" value="-1" /> + <param name="suppressHeader" value="true" /> + <output name="output" ftype="sam" file="bowtie_out7.sam" sort="True" /> + <output name="output_unmapped_reads_l" ftype="fastqillumina" file="bowtie_out8_1.fastqillumina.sorted" sort="True" /> + <output name="output_unmapped_reads_r" ftype="fastqillumina" file="bowtie_out8_2.fastqillumina.sorted" sort="True" /> + </test> + <test> + <param name="genomeSource" value="history" /> + <param name="ownFile" value="phiX.fasta" /> + <param name="indexSettings" value="indexPreSet" /> + <param name="sPaired" value="paired" /> + <param name="pInput1" ftype="fastqsolexa" value="bowtie_in5.fastqsolexa" /> + <param name="pInput2" ftype="fastqsolexa" value="bowtie_in6.fastqsolexa" /> + <param name="pMaxInsert" value="1000" /> + <param name="pMateOrient" value="ff" /> + <param name="pSettingsType" value="full" /> + <param name="pSkip" value="0" /> + <param name="pAlignLimit" value="-1" /> + <param name="pTrimH" value="0" /> + <param name="pTrimL" value="0" /> + <param name="alignMode" value="nMode" /> + <param name="pMismatchSeed" value="2" /> + <param name="pMismatchQual" value="70" /> + <param name="pSeedLen" value="28" /> + <param name="pRounding" value="round" /> + <param name="pMinInsert" value="0" /> + <param name="pMaxAlignAttempt" value="100" /> + <param name="pForwardAlign" value="forward" /> + <param name="pReverseAlign" value="reverse" /> + <param name="pTryHard" value="noTryHard" /> + <param name="pValAlign" value="1" /> + <param name="pAllValAligns" value="noAllValAligns" /> + <param name="pSuppressAlign" value="-1" /> + <param name="pUnmappedFile" value="true" /> + <param name="pMaxFile" value="false" /> + <param name="pBest" value="doBest" /> + <param name="pdMaxBacktracks" value="800" /> + <param name="pdStrata" value="noStrata" /> + <param name="pOffrate" value="-1" /> + <param name="pSeed" value="-1" /> + <param name="suppressHeader" value="true" /> + <output name="output" ftype="sam" file="bowtie_out7.sam" sort="True" /> + <output name="output_unmapped_reads_l" ftype="fastqsolexa" file="bowtie_out8_1.fastqsolexa.sorted" sort="True" /> + <output name="output_unmapped_reads_r" ftype="fastqsolexa" file="bowtie_out8_2.fastqsolexa.sorted" sort="True" /> + </test> + <!-- end testing of non-sanger variant fastq reads --> + <test> + <!-- + Bowtie command: + bowtie -q -p 4 -S +sam-nohead -n 2 -e 70 -l 28 -y -k 1 chrM_base test-data/bowtie_in2.fastqsanger > bowtie_out9_u.sam + sort bowtie_out9_u.sam > bowtie_out9.sam + -p is the number of threads. You need to replace the + with 2 dashes. + chrM_base is the index files' location/base name. + --> + <param name="genomeSource" value="indexed" /> + <!-- this is the backwards-compatible "unique value" for this index, not an actual path --> + <param name="index" value="equCab2chrM" /> + <param name="sPaired" value="single" /> + <param name="sInput1" ftype="fastqsanger" value="bowtie_in2.fastqsanger" /> + <param name="sSettingsType" value="full" /> + <param name="sSkip" value="0" /> + <param name="sAlignLimit" value="-1" /> + <param name="sTrimH" value="0" /> + <param name="sTrimL" value="0" /> + <param name="alignMode" value="nMode" /> + <param name="sMismatchSeed" value="2" /> + <param name="sMismatchQual" value="70" /> + <param name="sSeedLen" value="28" /> + <param name="sRounding" value="round" /> + <param name="sForwardAlign" value="forward" /> + <param name="sReverseAlign" value="reverse" /> + <param name="sTryHard" value="doTryHard" /> + <param name="sValAlign" value="1" /> + <param name="sAllValAligns" value="noAllValAligns" /> + <param name="sSuppressAlign" value="-1" /> + <param name="sUnmappedFile" value="false" /> + <param name="sMaxFile" value="false" /> + <param name="sBest" value="noBest" /> + <param name="sOffrate" value="-1" /> + <param name="sSeed" value="-1" /> + <param name="suppressHeader" value="true" /> + <output name="output" ftype="sam" file="bowtie_out9.sam" sort="True"> + <metadata name="dbkey" value="equCab2" /> + </output> + </test> + <test> + <!-- + Bowtie command: + bowtie-build +offrate 5 +ftabchars 10 +little -f test-data/phiX.fasta phiX_base + bowtie -q -X 1000 +ff -p 4 -S +sam-nohead phiX_base -1 test-data/bowtie_in5.fastqsanger -2 test-data/bowtie_in6.fastqsanger > bowtie_out10_u.sam + sort bowtie_out10_u.sam > bowtie_out10.sam + -p is the number of threads. You need to replace the + with 2 dashes. + chrM_base is the index files' location/base name. + --> + <param name="genomeSource" value="history" /> + <param name="ownFile" value="phiX.fasta" /> + <param name="indexSettings" value="indexFull" /> + <param name="autoB" value="auto" /> + <param name="nodc" value="dc" /> + <param name="noref" value="ref" /> + <param name="offrate" value="5" /> + <param name="ftab" value="10" /> + <param name="ntoa" value="no" /> + <param name="endian" value="little" /> + <param name="seed" value="-1" /> + <param name="sPaired" value="paired" /> + <param name="pInput1" ftype="fastqsanger" value="bowtie_in5.fastqsanger" /> + <param name="pInput2" ftype="fastqsanger" value="bowtie_in6.fastqsanger" /> + <param name="pMaxInsert" value="1000" /> + <param name="pMateOrient" value="ff" /> + <param name="pSettingsType" value="preSet" /> + <param name="suppressHeader" value="true" /> + <output name="output" ftype="sam" file="bowtie_out10.sam" sort="True" /> + </test> + <test> + <!-- + Bowtie command: + bowtie-build +offrate 5 +ftabchars 10 +little -f test-data/phiX.fasta phiX_base + bowtie -q -X 1000 +ff -p 4 -S +sam-nohead phiX_base -1 test-data/bowtie_in5.fastqsanger -2 test-data/bowtie_in6.fastqsanger > bowtie_out10_u.sam + sort bowtie_out10_u.sam > bowtie_out10.sam + -p is the number of threads. You need to replace the + with 2 dashes. + chrM_base is the index files' location/base name. + --> + <param name="genomeSource" value="history" /> + <param name="ownFile" value="phiX.fasta" /> + <param name="indexSettings" value="indexFull" /> + <param name="autoB" value="auto" /> + <param name="nodc" value="dc" /> + <param name="noref" value="ref" /> + <param name="offrate" value="5" /> + <param name="ftab" value="10" /> + <param name="ntoa" value="no" /> + <param name="endian" value="little" /> + <param name="seed" value="-1" /> + <param name="sPaired" value="paired" /> + <param name="pInput1" ftype="fastqsanger" value="bowtie_in5.fastqsanger" /> + <param name="pInput2" ftype="fastqsanger" value="bowtie_in6.fastqsanger" /> + <param name="pMaxInsert" value="1000" /> + <param name="pMateOrient" value="ff" /> + <param name="pSettingsType" value="preSet" /> + <param name="suppressHeader" value="true" /> + <param name="save_mapping_stats" value="true" /> + <output name="output" ftype="sam" file="bowtie_out10.sam" sort="True" /> + <output name="mapping_stats" ftype="txt" file="bowtie_out11.txt" sort="True" /> + </test> + </tests> + + <help> + +**What it does** + +Bowtie_ is a short read aligner designed to be ultrafast and memory-efficient. It is developed by Ben Langmead and Cole Trapnell. Please cite: Langmead B, Trapnell C, Pop M, Salzberg SL. Ultrafast and memory-efficient alignment of short DNA sequences to the human genome. Genome Biology 10:R25. + +.. _Bowtie: http://bowtie-bio.sourceforge.net/index.shtml + +------ + +**Know what you are doing** + +.. class:: warningmark + +There is no such thing (yet) as an automated gearshift in short read mapping. It is all like stick-shift driving in San Francisco. In other words = running this tool with default parameters will probably not give you meaningful results. A way to deal with this is to **understand** the parameters by carefully reading the `documentation`__ and experimenting. Fortunately, Galaxy makes experimenting easy. + + .. __: http://bowtie-bio.sourceforge.net/index.shtml + +------ + +**Input formats** + +Bowtie accepts files in Sanger FASTQ format. Use the FASTQ Groomer to prepare your files. + +------ + +**A Note on Built-in Reference Genomes** + +The default variant for all genomes is "Full", defined as all primary chromosomes (or scaffolds/contigs) including mitochondrial plus associated unmapped, plasmid, and other segments. When only one version of a genome is available in this tool, it represents the default "Full" variant. Some genomes will have more than one variant available. The "Canonical Male" or sometimes simply "Canonical" variant contains the primary chromosomes for a genome. For example a human "Canonical" variant contains chr1-chr22, chrX, chrY, and chrM. The "Canonical Female" variant contains the primary chromosomes excluding chrY. + +------ + +**Outputs** + +The output is in SAM format, and has the following columns:: + + Column Description + -------- -------------------------------------------------------- + 1 QNAME Query (pair) NAME + 2 FLAG bitwise FLAG + 3 RNAME Reference sequence NAME + 4 POS 1-based leftmost POSition/coordinate of clipped sequence + 5 MAPQ MAPping Quality (Phred-scaled) + 6 CIGAR extended CIGAR string + 7 MRNM Mate Reference sequence NaMe ('=' if same as RNAME) + 8 MPOS 1-based Mate POSition + 9 ISIZE Inferred insert SIZE + 10 SEQ query SEQuence on the same strand as the reference + 11 QUAL query QUALity (ASCII-33 gives the Phred base quality) + 12 OPT variable OPTional fields in the format TAG:VTYPE:VALUE + +The flags are as follows:: + + Flag Description + ------ ------------------------------------- + 0x0001 the read is paired in sequencing + 0x0002 the read is mapped in a proper pair + 0x0004 the query sequence itself is unmapped + 0x0008 the mate is unmapped + 0x0010 strand of the query (1 for reverse) + 0x0020 strand of the mate + 0x0040 the read is the first read in a pair + 0x0080 the read is the second read in a pair + 0x0100 the alignment is not primary + +It looks like this (scroll sideways to see the entire example):: + + QNAME FLAG RNAME POS MAPQ CIAGR MRNM MPOS ISIZE SEQ QUAL OPT + HWI-EAS91_1_30788AAXX:1:1:1761:343 4 * 0 0 * * 0 0 AAAAAAANNAAAAAAAAAAAAAAAAAAAAAAAAAAACNNANNGAGTNGNNNNNNNGCTTCCCACAGNNCTGG hhhhhhh;;hhhhhhhhhhh^hOhhhhghhhfhhhgh;;h;;hhhh;h;;;;;;;hhhhhhghhhh;;Phhh + HWI-EAS91_1_30788AAXX:1:1:1578:331 4 * 0 0 * * 0 0 GTATAGANNAATAAGAAAAAAAAAAATGAAGACTTTCNNANNTCTGNANNNNNNNTCTTTTTTCAGNNGTAG hhhhhhh;;hhhhhhhhhhhhhhhhhhhhhhhhhhhh;;h;;hhhh;h;;;;;;;hhhhhhhhhhh;;hhVh + +------- + +**Bowtie settings** + +All of the options have a default value. You can change any of them. Most of the options in Bowtie have been implemented here. + +------ + +**Bowtie parameter list** + +This is an exhaustive list of Bowtie options: + +For indexing (bowtie-build):: + + -a No auto behavior. Disable the default behavior where bowtie automatically + selects values for --bmax/--bmaxdivn/--dcv/--packed parameters according + to the memory available. [off] + --packed Packing. Use a packed representation for DNA strings. [auto] + --bmax INT Suffix maximum. The maximum number of suffixes allowed in a block. [auto] + --bmaxdivn INT Suffix maximum fraction. The maximum number of suffixes allowed in a block + expressed as a fraction of the length of the reference. [4] + --dcv INT Difference-cover sample. Use INT as the period for the difference-cover + sample. [1024] + --nodc INT No difference-cover sample. Disable the difference-cover sample. [off] + -r No reference indexes. Do not build the NAME.3.ebwt and NAME.4.ebwt portions + of the index. Used only for paired-end alignment. [off] + -o Offrate. How many Burrows-Wheeler rows get marked by the indexer. The + indexer will mark every 2^INT rows. The marked rows correspond to rows on + the genome. [5] + -t INT The ftab lookup table used to calculate an initial Burrows-Wheeler range + with respect to the first INT characters of the query. Ftab size is 4^(INT+1) + bytes. [10] + --ntoa N conversion. Convert Ns to As before building the index. Otherwise, Ns are + simply excluded from the index and Bowtie will not find alignments that + overlap them. [off] + --big Endianness. Endianness to use when serializing integers to the index file. [off] + --little Endianness. [--little] + --seed INT Random seed. Use INT as the seed for the pseudo-random number generator. [off] + +For aligning (bowtie):: + + -s INT Skip. Do not align the first INT reads or pairs in the input. [off] + -u INT Align limit. Only align the first INT reads/pairs from the input. [no limit] + -5 INT High-quality trim. Trim INT bases from the high-quality (left) end of each + read before alignment. [0] + -3 INT Low-quality trim. Trim INT bases from the low-quality (right) end of each + read before alignment. [0] + -n INT Mismatch seed. Maximum number of mismatches permitted in the seed (defined + with seed length option). Can be 0, 1, 2, or 3. [2] + -e INT Mismatch quality. Maximum permitted total of quality values at mismatched + read positions. Bowtie rounds quality values to the nearest 10 and saturates + at 30. [70] + -l INT Seed length. The number of bases on the high-quality end of the read to + which the -n ceiling applies. Must be at least 5. [28] + --nomaqround Suppress Maq rounding. Values are internally rounded to the nearest 10 and + saturate at 30. This options turns off that rounding. [off] + -v INT Maq- or SOAP-like alignment policy. This option turns off the default + Maq-like alignment policy in favor of a SOAP-like one. End-to-end alignments + with at most INT mismatches. [off] + -I INT Minimum insert. The minimum insert size for valid paired-end alignments. + Does checking on untrimmed reads if -5 or -3 is used. [0] + -X INT Maximum insert. The maximum insert size for valid paired-end alignments. + Does checking on untrimmed reads if -5 or -3 is used. [250] + --fr Mate orientation. The upstream/downstream mate orientations for a valid + paired-end alignment against the forward reference strand. [--fr] + --rf Mate orientation. [off] + --ff Mate orientation. [off] + --pairtries INT Maximum alignment attempts for paired-end data. [100] + --nofw No forward aligning. Choosing this option means that Bowtie will not attempt + to align against the forward reference strand. [off] + --norc No reverse-complement aligning. Setting this will mean that Bowtie will not + attempt to align against the reverse-complement reference strand. [off] + --un FILENAME Write all reads that could not be aligned to file [off] + --max FILENAME Write all reads with a number of valid alignments exceeding the limit + set with the -m option to file [off] + --maxbts INT Maximum backtracks. The maximum number of backtracks permitted when aligning + a read in -n 2 or -n 3 mode. [125 without --best] [800 with --best] + -y Try hard. Try as hard as possible to find valid alignments when they exist, + including paired-end alignments. [off] + --chunkmbs INT Thread memory. The number of megabytes of memory a given thread is given to + store path descriptors in --best mode. [32] + -k INT Valid alignments. The number of valid alignments per read or pair. [off] + -a All valid alignments. Choosing this means that all valid alignments per read + or pair will be reported. [off] + -m INT Suppress alignments. Suppress all alignments for a particular read or pair + if more than INT reportable alignments exist for it. [no limit] + --best Best mode. Make Bowtie guarantee that reported singleton alignments are + "best" in terms of stratum (the number of mismatches) and quality values at + mismatched position. [off] + --strata Best strata. When running in best mode, report alignments that fall into the + best stratum if there are ones falling into more than one. [off] + -o INT Offrate override. Override the offrate of the index with INT. Some row + markings are discarded when index read into memory. INT must be greater than + the value used to build the index (default: 5). [off] + --seed INT Random seed. Use INT as the seed for the pseudo-random number generator. [off] + --snpphred INT Use INT as the SNP penalty for decoding colorspace alignments. True ratio of + SNPs per base in the subject genome. [see --snpfrac] + --snpfrac DEC Use DEC as the estimated ratio of SNPs per base when decoding colorspace + alignments. [0.001] + --col-keepends Keep the extreme-end nucleotides and qualities when decoding colorspace + alignments. [off] + + </help> + <citations> + <citation type="doi">10.1186/gb-2009-10-3-r25</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bowtie_genome_wrapper/tool-data/bowtie_indices.loc.sample Tue Mar 22 12:28:40 2022 +0000 @@ -0,0 +1,37 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a directory of Bowtie indexed sequences data files. You will +#need to create these data files and then create a bowtie_indices.loc +#file similar to this one (store it in this directory) that points to +#the directories in which those files are stored. The bowtie_indices.loc +#file has this format (longer white space characters are TAB characters): +# +#<unique_build_id> <dbkey> <display_name> <file_base_path> +# +#So, for example, if you had hg18 indexed stored in +#/depot/data2/galaxy/bowtie/hg18/, +#then the bowtie_indices.loc entry would look like this: +# +#hg18 hg18 hg18 /depot/data2/galaxy/bowtie/hg18/hg18 +# +#and your /depot/data2/galaxy/bowtie/hg18/ directory +#would contain hg18.*.ebwt files: +# +#-rw-r--r-- 1 james universe 830134 2005-09-13 10:12 hg18.1.ebwt +#-rw-r--r-- 1 james universe 527388 2005-09-13 10:12 hg18.2.ebwt +#-rw-r--r-- 1 james universe 269808 2005-09-13 10:12 hg18.3.ebwt +#...etc... +# +#Your bowtie_indices.loc file should include an entry per line for each +#index set you have stored. The "file" in the path does not actually +#exist, but it is the prefix for the actual index files. For example: +# +hg18canon hg18 hg18 Canonical /depot/data2/galaxy/bowtie/hg18/hg18canon +hg18full hg18 hg18 Full /depot/data2/galaxy/bowtie/hg18/hg18full +/orig/path/hg19 hg19 hg19 /depot/data2/galaxy/bowtie/hg19/hg19 +#...etc... +# +#Note that for backwards compatibility with workflows, the unique ID of +#an entry must be the path that was in the original loc file, because that +#is the value stored in the workflow for that parameter. That is why the +#hg19 entry above looks odd. New genomes can be better-looking. +#
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bowtie_genome_wrapper/tool_data_table_conf.xml.sample Tue Mar 22 12:28:40 2022 +0000 @@ -0,0 +1,8 @@ +<!-- Use the file tool_data_table_conf.xml.oldlocstyle if you don't want to update your loc files as changed in revision 4550:535d276c92bc--> +<tables> + <!-- Locations of indexes in the Bowtie mapper format --> + <table name="bowtie_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="/home/jack/projects/tools_for_Galaxy/ribogalaxy-toolshed/tools/bowtie_genome_wrapper/tool-data/bowtie_indices.loc.sample" /> + </table> +</tables>