# HG changeset patch # User devteam # Date 1437498285 14400 # Node ID df86f29bedee53c1b6853f73c91d0703a1b2efe6 # Parent 9ca609a2a42147d90bb6baf2de5e41dbee87e4eb planemo upload commit 00a7926c285bc4a339bd7deebf40b28f39c7d947-dirty diff -r 9ca609a2a421 -r df86f29bedee bowtie_wrapper.py --- a/bowtie_wrapper.py Fri Feb 21 12:40:39 2014 -0500 +++ b/bowtie_wrapper.py Tue Jul 21 13:04:45 2015 -0400 @@ -27,10 +27,10 @@ -M, --mismatchQual=M: Maximum permitted total of quality values at mismatched read positions -l, --seedLen=l: Seed length -n, --rounding=n: Whether or not to round to the nearest 10 and saturating at 30 - -P, --maqSoapAlign=P: Choose MAQ- or SOAP-like alignment policy + -P, --maxMismatches=P: Maximum number of mismatches for -v alignment mode -w, --tryHard=: Whether or not to try as hard as possible to find valid alignments when they exist - -v, --valAlign=v: Report up to n valid arguments per read - -V, --allValAligns=V: Whether or not to report all valid alignments per read + -V, --allValAligns=V: Whether or not to report all valid alignments per read or pair + -v, --valAlign=v: Report up to n valid alignments per read or pair -G, --suppressAlign=G: Suppress all alignments for a read if more than n reportable alignments exist -b, --best=b: Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions -B, --maxBacktracks=B: Maximum number of backtracks permitted when aligning a read @@ -59,7 +59,6 @@ -X, --intoa=X: Whether or not to convert Ns in the reference sequence to As -N, --iendian=N: Endianness to use when serializing integers to the index file -Z, --iseed=Z: Seed for the pseudorandom number generator - -c, --icutoff=c: Number of first bases of the reference sequence to index -x, --indexSettings=x: Whether or not indexing options are to be set -H, --suppressHeader=H: Suppress header --do_not_build_index: Flag to specify that provided file is already indexed and to just use 'as is' @@ -101,10 +100,10 @@ parser.add_option( '-M', '--mismatchQual', dest='mismatchQual', help='Maximum permitted total of quality values at mismatched read positions' ) parser.add_option( '-l', '--seedLen', dest='seedLen', help='Seed length' ) parser.add_option( '-n', '--rounding', dest='rounding', help='Whether or not to round to the nearest 10 and saturating at 30' ) - parser.add_option( '-P', '--maqSoapAlign', dest='maqSoapAlign', help='Choose MAQ- or SOAP-like alignment policy' ) + parser.add_option( '-P', '--maxMismatches', dest='maxMismatches', help='Maximum number of mismatches for -v alignment mode' ) parser.add_option( '-w', '--tryHard', dest='tryHard', help='Whether or not to try as hard as possible to find valid alignments when they exist' ) - parser.add_option( '-v', '--valAlign', dest='valAlign', help='Report up to n valid arguments per read' ) - parser.add_option( '-V', '--allValAligns', dest='allValAligns', help='Whether or not to report all valid alignments per read' ) + parser.add_option( '-V', '--allValAligns', dest='allValAligns', help='Whether or not to report all valid alignments per read or pair' ) + parser.add_option( '-v', '--valAlign', dest='valAlign', help='Report up to n valid alignments per read or pair' ) parser.add_option( '-G', '--suppressAlign', dest='suppressAlign', help='Suppress all alignments for a read if more than n reportable alignments exist' ) parser.add_option( '-b', '--best', dest='best', help="Whether or not to make Bowtie guarantee that reported singleton alignments are 'best' in terms of stratum and in terms of the quality values at the mismatched positions" ) parser.add_option( '-B', '--maxBacktracks', dest='maxBacktracks', help='Maximum number of backtracks permitted when aligning a read' ) @@ -133,12 +132,13 @@ parser.add_option( '-X', '--intoa', dest='intoa', help='Whether or not to convert Ns in the reference sequence to As' ) parser.add_option( '-N', '--iendian', dest='iendian', help='Endianness to use when serializing integers to the index file' ) parser.add_option( '-Z', '--iseed', dest='iseed', help='Seed for the pseudorandom number generator' ) - parser.add_option( '-c', '--icutoff', dest='icutoff', help='Number of first bases of the reference sequence to index' ) parser.add_option( '-x', '--indexSettings', dest='index_settings', help='Whether or not indexing options are to be set' ) parser.add_option( '-H', '--suppressHeader', dest='suppressHeader', help='Suppress header' ) parser.add_option( '--galaxy_input_format', dest='galaxy_input_format', default="fastqsanger", help='galaxy input format' ) parser.add_option( '--do_not_build_index', dest='do_not_build_index', action="store_true", default=False, help='Flag to specify that provided file is already indexed, use as is' ) (options, args) = parser.parse_args() + if options.mismatchSeed and options.maxMismatches: + parser.error("options --mismatchSeed and --maxMismatches are mutually exclusive") stdout = '' # make temp directory for placement of indices and copy reference file there if necessary @@ -159,19 +159,19 @@ iautoB = '--noauto' else: iautoB = '' - if options. ipacked and options.ipacked == 'packed': + if options.ipacked and options.ipacked == 'packed': ipacked = '--packed' else: ipacked = '' if options.ibmax and int( options.ibmax ) >= 1: - ibmax = '--bmax %s' % options.ibmax + ibmax = '--bmax %s' % options.ibmax else: ibmax = '' if options.ibmaxdivn and int( options.ibmaxdivn ) >= 0: ibmaxdivn = '--bmaxdivn %s' % options.ibmaxdivn else: ibmaxdivn = '' - if options.idcv and int( options.idcv ) > 0: + if options.idcv and int( options.idcv ) >= 3: idcv = '--dcv %s' % options.idcv else: idcv = '' @@ -183,7 +183,7 @@ inoref = '--noref' else: inoref = '' - if options.iftab and int( options.iftab ) >= 0: + if options.iftab and int( options.iftab ) >= 1: iftab = '--ftabchars %s' % options.iftab else: iftab = '' @@ -199,14 +199,10 @@ iseed = '--seed %s' % options.iseed else: iseed = '' - if options.icutoff and int( options.icutoff ) > 0: - icutoff = '--cutoff %s' % options.icutoff - else: - icutoff = '' - indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s %s' % \ - ( iautoB, ipacked, ibmax, ibmaxdivn, idcv, inodc, - inoref, options.ioffrate, iftab, intoa, iendian, - iseed, icutoff, colorspace ) + indexing_cmds = '%s %s %s %s %s %s %s --offrate %s %s %s %s %s %s' % \ + ( iautoB, ipacked, ibmax, ibmaxdivn, idcv, inodc, + inoref, options.ioffrate, iftab, intoa, iendian, + iseed, colorspace ) except ValueError, e: # clean up temp dir if os.path.exists( tmp_index_dir ): @@ -283,16 +279,17 @@ trimL = '-3 %s' % options.trimL else: trimL = '' - if options.maqSoapAlign != '-1' and int( options.maqSoapAlign ) >= 0: - maqSoapAlign = '-v %s' % options.maqSoapAlign + if options.maxMismatches and (options.maxMismatches == '0' or options.maxMismatches == '1' \ + or options.maxMismatches == '2' or options.maxMismatches == '3'): + maxMismatches = '-v %s' % options.maxMismatches else: - maqSoapAlign = '' + maxMismatches = '' if options.mismatchSeed and (options.mismatchSeed == '0' or options.mismatchSeed == '1' \ or options.mismatchSeed == '2' or options.mismatchSeed == '3'): mismatchSeed = '-n %s' % options.mismatchSeed else: mismatchSeed = '' - if options.mismatchQual and int( options.mismatchQual ) >= 0: + if options.mismatchQual and int( options.mismatchQual ) >= 1: mismatchQual = '-e %s' % options.mismatchQual else: mismatchQual = '' @@ -395,8 +392,8 @@ aligning_cmds = '-q %s %s -p %s -S %s %s %s %s %s %s %s %s %s %s %s %s ' \ '%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s ' % \ ( maxInsert, mateOrient, options.threads, suppressHeader, - colorspace, skip, alignLimit, trimH, trimL, maqSoapAlign, - mismatchSeed, mismatchQual, seedLen, rounding, minInsert, + colorspace, skip, alignLimit, trimH, trimL, maxMismatches, + mismatchSeed, mismatchQual, seedLen, rounding, minInsert, maxAlignAttempt, forwardAlign, reverseAlign, maxBacktracks, tryHard, valAlign, allValAligns, suppressAlign, best, strata, offrate, seed, snpphred, snpfrac, keepends, @@ -466,4 +463,5 @@ stdout += 'Sequence file aligned.\n' sys.stdout.write( stdout ) -if __name__=="__main__": __main__() +if __name__ == "__main__": + __main__() diff -r 9ca609a2a421 -r df86f29bedee bowtie_wrapper.xml --- a/bowtie_wrapper.xml Fri Feb 21 12:40:39 2014 -0500 +++ b/bowtie_wrapper.xml Tue Jul 21 13:04:45 2015 -0400 @@ -1,9 +1,9 @@ - + - bowtie + bowtie - + bowtie --version bowtie_wrapper.py ## Set number of threads @@ -58,7 +58,6 @@ --intoa="${refGenomeSource.indexParams.ntoa}" --iendian="${refGenomeSource.indexParams.endian}" --iseed="${refGenomeSource.indexParams.seed}" - --icutoff="${refGenomeSource.indexParams.cutoff}" #end if #end if #else @@ -74,21 +73,32 @@ --alignLimit="${singlePaired.sParams.sAlignLimit}" --trimH="${singlePaired.sParams.sTrimH}" --trimL="${singlePaired.sParams.sTrimL}" - --mismatchSeed="${singlePaired.sParams.sMismatchSeed}" - --mismatchQual="${singlePaired.sParams.sMismatchQual}" - --seedLen="${singlePaired.sParams.sSeedLen}" - --rounding="${singlePaired.sParams.sRounding}" - --maqSoapAlign="${singlePaired.sParams.sMaqSoapAlign}" - --tryHard="${singlePaired.sParams.sTryHard}" - --valAlign="${singlePaired.sParams.sValAlign}" - --allValAligns="${singlePaired.sParams.sAllValAligns}" + #if $singlePaired.sParams.alignModeOption.alignMode == 'nMode' + --mismatchSeed="${singlePaired.sParams.alignModeOption.sMismatchSeed}" + --mismatchQual="${singlePaired.sParams.alignModeOption.sMismatchQual}" + --seedLen="${singlePaired.sParams.alignModeOption.sSeedLen}" + --rounding="${singlePaired.sParams.alignModeOption.sRounding}" + #else + --maxMismatches="${singlePaired.sParams.alignModeOption.maxMismatches}" + #end if + --forwardAlign="${singlePaired.sParams.sForwardAlign}" + --reverseAlign="${singlePaired.sParams.sReverseAlign}" + --tryHard="${singlePaired.sParams.sBestOption.sTryHardOption.sTryHard}" + --allValAligns="${singlePaired.sParams.sAllValAlignsOption.sAllValAligns}" + #if $singlePaired.sParams.sAllValAlignsOption.sAllValAligns == "noAllValAligns" + --valAlign="${singlePaired.sParams.sAllValAlignsOption.sValAlign}" + #end if --suppressAlign="${singlePaired.sParams.sSuppressAlign}" --best="${singlePaired.sParams.sBestOption.sBest}" #if $singlePaired.sParams.sBestOption.sBest == "doBest": - --maxBacktracks="${singlePaired.sParams.sBestOption.sdMaxBacktracks}" --strata="${singlePaired.sParams.sBestOption.sdStrata}" + #if $singlePaired.sParams.sBestOption.sTryHardOption.sTryHard == "noTryHard" + --maxBacktracks="${singlePaired.sParams.sBestOption.sTryHardOption.sdMaxBacktracks}" + #end if #else: - --maxBacktracks="${singlePaired.sParams.sBestOption.snMaxBacktracks}" + #if $singlePaired.sParams.sBestOption.sTryHardOption.sTryHard == "noTryHard" + --maxBacktracks="${singlePaired.sParams.sBestOption.sTryHardOption.snMaxBacktracks}" + #end if #end if --offrate="${singlePaired.sParams.sOffrate}" --seed="${singlePaired.sParams.sSeed}" @@ -104,25 +114,35 @@ --alignLimit="${singlePaired.pParams.pAlignLimit}" --trimH="${singlePaired.pParams.pTrimH}" --trimL="${singlePaired.pParams.pTrimL}" - --mismatchSeed="${singlePaired.pParams.pMismatchSeed}" - --mismatchQual="${singlePaired.pParams.pMismatchQual}" - --seedLen="${singlePaired.pParams.pSeedLen}" - --rounding="${singlePaired.pParams.pRounding}" - --maqSoapAlign="${singlePaired.pParams.pMaqSoapAlign}" + #if $singlePaired.pParams.alignModeOption.alignMode == 'nMode' + --mismatchSeed="${singlePaired.pParams.alignModeOption.pMismatchSeed}" + --mismatchQual="${singlePaired.pParams.alignModeOption.pMismatchQual}" + --seedLen="${singlePaired.pParams.alignModeOption.pSeedLen}" + --rounding="${singlePaired.pParams.alignModeOption.pRounding}" + #else + --maxMismatches="${singlePaired.pParams.alignModeOption.maxMismatches}" + #end if --minInsert="${singlePaired.pParams.pMinInsert}" - --maxAlignAttempt="${singlePaired.pParams.pMaxAlignAttempt}" --forwardAlign="${singlePaired.pParams.pForwardAlign}" --reverseAlign="${singlePaired.pParams.pReverseAlign}" - --tryHard="${singlePaired.pParams.pTryHard}" - --valAlign="${singlePaired.pParams.pValAlign}" - --allValAligns="${singlePaired.pParams.pAllValAligns}" + --tryHard="${singlePaired.pParams.pBestOption.pTryHardOption.pTryHard}" + --allValAligns="${singlePaired.pParams.pAllValAlignsOption.pAllValAligns}" + #if $singlePaired.pParams.pAllValAlignsOption.pAllValAligns == "noAllValAligns" + --valAlign="${singlePaired.pParams.pAllValAlignsOption.pValAlign}" + #end if --suppressAlign="${singlePaired.pParams.pSuppressAlign}" --best="${singlePaired.pParams.pBestOption.pBest}" #if $singlePaired.pParams.pBestOption.pBest == "doBest": - --maxBacktracks="${singlePaired.pParams.pBestOption.pdMaxBacktracks}" --strata="${singlePaired.pParams.pBestOption.pdStrata}" + #if $singlePaired.pParams.pBestOption.pTryHardOption.pTryHard == "noTryHard" + --maxAlignAttempt="${singlePaired.pParams.pBestOption.pTryHardOption.pMaxAlignAttempt}" + --maxBacktracks="${singlePaired.pParams.pBestOption.pTryHardOption.pdMaxBacktracks}" + #end if #else: - --maxBacktracks="${singlePaired.pParams.pBestOption.pnMaxBacktracks}" + #if $singlePaired.pParams.pBestOption.pTryHardOption.pTryHard == "noTryHard" + --maxAlignAttempt="${singlePaired.pParams.pBestOption.pTryHardOption.pMaxAlignAttempt}" + --maxBacktracks="${singlePaired.pParams.pBestOption.pTryHardOption.pnMaxBacktracks}" + #end if #end if --offrate="${singlePaired.pParams.pOffrate}" --seed="${singlePaired.pParams.pSeed}" @@ -149,7 +169,7 @@ - + @@ -159,13 +179,13 @@ - + - + @@ -176,8 +196,8 @@ - - + + @@ -185,9 +205,8 @@ - + - @@ -203,49 +222,86 @@ - + - - + + - - - - - - + + + + + + + + + + + + + + + + + + + + + - - - - + + + - - - - - - - - - + + + + + + + + + + - + + + + + + + + + + + + + + + + + + + + + + + @@ -257,7 +313,7 @@ > - + @@ -273,63 +329,93 @@ - - + + - - - - - - - - + + + + + + + + + + + + + + + + + + - - + - + - - - - - - - - - - - - - + + + + + + + + + + + - + + + + + + + + + + + + + + + + + + + + + + + + - + @@ -449,7 +535,7 @@ Bowtie command: bowtie -q -p 4 -S +sam-nohead chrM_base test-data/bowtie_in2.fastqsanger > bowtie_out6_u.sam sort bowtie_out6_u.sam > bowtie_out6.sam - -p is the number of threads. You need to replace the + with 2 dashes. + -p is the number of threads. You need to replace the + with 2 dashes. chrM_base needs to be the base location/name of the index files. --> @@ -472,7 +558,7 @@ Then also need to modify bowtie_out8_1.sam and bowtie_out8_2.sam so that all @ lines come before sequence lines. -p is the number of threads. You need to replace the + with 2 dashes. The two unmapped output files will be named bowtie_out8_1.fastq and bowtie_out8_2.fastq. - chrM_base is the index files' location/base name. + chrM_base is the index files' location/base name. --> @@ -487,11 +573,11 @@ + - @@ -527,11 +613,11 @@ + - @@ -549,8 +635,8 @@ - - + + @@ -566,11 +652,11 @@ + - @@ -591,14 +677,14 @@ - + @@ -610,11 +696,13 @@ + - + + @@ -622,7 +710,6 @@ - @@ -635,7 +722,7 @@ bowtie -q -X 1000 +ff -p 4 -S +sam-nohead phiX_base -1 test-data/bowtie_in5.fastqsanger -2 test-data/bowtie_in6.fastqsanger > bowtie_out10_u.sam sort bowtie_out10_u.sam > bowtie_out10.sam -p is the number of threads. You need to replace the + with 2 dashes. - chrM_base is the index files' location/base name. + chrM_base is the index files' location/base name. --> @@ -648,7 +735,6 @@ - @@ -697,7 +783,7 @@ The output is in SAM format, and has the following columns:: Column Description - -------- -------------------------------------------------------- + -------- -------------------------------------------------------- 1 QNAME Query (pair) NAME 2 FLAG bitwise FLAG 3 RNAME Reference sequence NAME @@ -710,7 +796,7 @@ 10 SEQ query SEQuence on the same strand as the reference 11 QUAL query QUALity (ASCII-33 gives the Phred base quality) 12 OPT variable OPTional fields in the format TAG:VTYPE:VALUE - + The flags are as follows:: Flag Description @@ -760,8 +846,8 @@ -o Offrate. How many Burrows-Wheeler rows get marked by the indexer. The indexer will mark every 2^INT rows. The marked rows correspond to rows on the genome. [5] - -t INT Ftab. The lookup table used to calculate an initial Burrows-Wheeler range - with respect to the first INT characters of the query. Ftab is 4^INT+1 + -t INT The ftab lookup table used to calculate an initial Burrows-Wheeler range + with respect to the first INT characters of the query. Ftab size is 4^(INT+1) bytes. [10] --ntoa N conversion. Convert Ns to As before building the index. Otherwise, Ns are simply excluded from the index and Bowtie will not find alignments that @@ -769,8 +855,6 @@ --big Endianness. Endianness to use when serializing integers to the index file. [off] --little Endianness. [--little] --seed INT Random seed. Use INT as the seed for the pseudo-random number generator. [off] - --cutoff INT Cutoff. Index only the first INT bases of the reference sequences (cumulative - across sequences) and ignore the rest. [off] For aligning (bowtie):: @@ -787,10 +871,10 @@ at 30. [70] -l INT Seed length. The number of bases on the high-quality end of the read to which the -n ceiling applies. Must be at least 5. [28] - --nomaqround Suppress MAQ rounding. Values are internally rounded to the nearest 10 and + --nomaqround Suppress Maq rounding. Values are internally rounded to the nearest 10 and saturate at 30. This options turns off that rounding. [off] - -v INT MAQ- or SOAP-like alignment policy. This option turns off the default - MAQ-like alignment policy in favor of a SOAP-like one. End-to-end alignments + -v INT Maq- or SOAP-like alignment policy. This option turns off the default + Maq-like alignment policy in favor of a SOAP-like one. End-to-end alignments with at most INT mismatches. [off] -I INT Minimum insert. The minimum insert size for valid paired-end alignments. Does checking on untrimmed reads if -5 or -3 is used. [0] @@ -835,5 +919,8 @@ --col-keepends Keep the extreme-end nucleotides and qualities when decoding colorspace alignments. [off] - + + + 10.1186/gb-2009-10-3-r25 + diff -r 9ca609a2a421 -r df86f29bedee tool_dependencies.xml --- a/tool_dependencies.xml Fri Feb 21 12:40:39 2014 -0500 +++ b/tool_dependencies.xml Tue Jul 21 13:04:45 2015 -0400 @@ -1,6 +1,6 @@ - +