Mercurial > repos > bgruening > bismark
changeset 4:243e8f9fb75b draft
Uploaded
author | bgruening |
---|---|
date | Mon, 09 Feb 2015 18:24:41 -0500 |
parents | 91f07ff056ca |
children | b100248c35b8 |
files | bismark_bowtie2_wrapper.xml bismark_bowtie_wrapper.xml bismark_methylation_extractor.xml bismark_wrapper.py tool_dependencies.xml |
diffstat | 5 files changed, 118 insertions(+), 79 deletions(-) [+] |
line wrap: on
line diff
--- a/bismark_bowtie2_wrapper.xml Mon Apr 14 16:43:14 2014 -0400 +++ b/bismark_bowtie2_wrapper.xml Mon Feb 09 18:24:41 2015 -0500 @@ -7,14 +7,20 @@ <requirement type="package" version="0.1.19">samtools</requirement> <requirement type="package" version="2.1.0">bowtie2</requirement> </requirements> - <parallelism method="basic"></parallelism> + <stdio> + <exit_code range="1:" /> + <exit_code range=":-1" /> + <regex match="Error:" /> + <regex match="Exception:" /> + </stdio> <command interpreter="python"> +<![CDATA[ bismark_wrapper.py - + ## Change this to accommodate the number of threads you have available. --num-threads "\${GALAXY_SLOTS:-24}" - --bismark_path \$SCRIPT_PATH + ##--bismark_path \$SCRIPT_PATH --bowtie2 @@ -34,7 +40,6 @@ ## Input parameters ## - #if $singlePaired.sPaired == "single": --single-paired $singlePaired.input_singles @@ -58,14 +63,17 @@ --mate1 #echo ','.join($mate1) --mate2 #echo ','.join($mate2) - #if $singlePaired.mate_list[0].input_mate1.ext == "fastqillumina": - --phred64-quals - --fastq - #elif $singlePaired.mate_list[0].input_mate1.ext == "fastqsanger": - --fastq - #elif $singlePaired.mate_list[0].input_mate1.ext == "fasta": - --fasta - #end if + #for $mate_pair in $singlePaired.mate_list: + #if $mate_pair.input_mate1.ext == "fastqillumina": + --phred64-quals + --fastq + #elif $mate_pair.input_mate1.ext == "fastqsanger": + --fastq + #elif $mate_pair.input_mate1.ext == "fasta": + --fasta + #end if + #break + #end for -I $singlePaired.minInsert -X $singlePaired.maxInsert @@ -89,7 +97,7 @@ --seed-extention-attempts $params.seed_extention_attempts ## default 2 --max-reseed $params.max_reseed - + ## default 70 ##--maqerr $params.maqerr @@ -140,6 +148,7 @@ #end if #end if +]]> </command> <inputs> <conditional name="refGenomeSource"> @@ -213,7 +222,7 @@ <param name="bismark_stdout" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Write the bismark output and summary information to an extra file" /> <param name="isReportOutput" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Offer all report files concatenated in one file" /> - <!--end output options --> + <!--end output options --> </when> <!-- full --> </conditional> <!-- params --> <!-- @@ -275,18 +284,22 @@ </action> </when> <when value="paired"> - <action type="format"> + <!--action type="format"> <option type="from_param" name="singlePaired.mate_list[0].input_mate1" param_attribute="ext" /> - </action> + </action--> </when> </conditional> </actions> </data> <data format="fastq" name="output_suppressed_reads_r" label="${tool.name} on ${on_string}: suppressed reads (R)"> - <filter>singlePaired['sPaired'] == "paired"</filter> - <filter>params['settingsType'] == "custom"</filter> - <filter>params['supressed_read_file'] is True</filter> + <filter> + (( + singlePaired['sPaired'] == "paired" and + params['settingsType'] == "custom" and + params['suppressed_read_file'] is True + )) + </filter> <actions> <conditional name="singlePaired.sPaired"> <when value="single"> @@ -295,9 +308,9 @@ </action> </when> <when value="paired"> - <action type="format"> + <!--action type="format"> <option type="from_param" name="singlePaired.mate_list[0].input_mate1" param_attribute="ext" /> - </action> + </action--> </when> </conditional> </actions> @@ -319,18 +332,22 @@ </action> </when> <when value="paired"> - <action type="format"> + <!--action type="format"> <option type="from_param" name="singlePaired.mate_list[0].input_mate1" param_attribute="ext" /> - </action> + </action--> </when> </conditional> </actions> </data> <data format="fastq" name="output_unmapped_reads_r" label="${tool.name} on ${on_string}: unmapped reads (R)"> - <filter>singlePaired['sPaired'] == "paired"</filter> - <filter>params['settingsType'] == "custom"</filter> - <filter>params['unmapped_read_file'] is True</filter> + <filter> + (( + singlePaired['sPaired'] == "paired" and + params['settingsType'] == "custom" and + params['unmapped_read_file'] is True + )) + </filter> <actions> <conditional name="singlePaired.sPaired"> <when value="single"> @@ -339,9 +356,9 @@ </action> </when> <when value="paired"> - <action type="format"> + <!--action type="format"> <option type="from_param" name="singlePaired.mate_list[0].input_mate1" param_attribute="ext" /> - </action> + </action--> </when> </conditional> </actions> @@ -352,6 +369,7 @@ </tests> <help> +<![CDATA[ **What it does** @@ -409,8 +427,8 @@ Column Description -------- -------------------------------------------------------- 1 QNAME seq-ID - 2 FLAG this flag tries to take the strand a bisulfite read - originated from into account + 2 FLAG this flag tries to take the strand a bisulfite read + originated from into account (this is different from ordinary DNA alignment flags!) 3 RNAME chromosome 4 POS start position @@ -422,12 +440,12 @@ 10 SEQ query SEQuence on the same strand as the reference 11 QUAL Phred33 scale 12 NM-tag edit distance to the reference) - 13 XX-tag base-by-base mismatches to the reference. + 13 XX-tag base-by-base mismatches to the reference. This does not include indels. 14 XM-tag methylation call string 15 XR-tag read conversion state for the alignment 16 XG-tag genome conversion state for the alignment - + Each read of paired-end alignments is written out in a separate line in the above format. @@ -484,7 +502,7 @@ --phred64-quals FASTQ qualities are ASCII chars equal to the Phred quality plus 64. Default: off. --solexa-quals Convert FASTQ qualities from solexa-scaled (which can be negative) to phred-scaled - (which can't). The formula for conversion is: + (which can't). The formula for conversion is: phred-qual = 10 * log(1 + 10 ** (solexa-qual/10.0)) / log(10). Used with -q. This is usually the right option for use with (unconverted) reads emitted by the GA Pipeline versions prior to 1.3. Works only for Bowtie 1. Default: off. @@ -496,7 +514,7 @@ Alignment:: -n/--seedmms INT The maximum number of mismatches permitted in the "seed", i.e. the first L base pairs - of the read (where L is set with -l/--seedlen). This may be 0, 1, 2 or 3 and the + of the read (where L is set with -l/--seedlen). This may be 0, 1, 2 or 3 and the default is 1. This option is only available for Bowtie 1 (for Bowtie 2 see -N). -l/--seedlen The "seed length"; i.e., the number of bases of the high quality end of the read to @@ -634,11 +652,15 @@ Bowtie 2 searches for at most INT+1 distinct, valid alignments for each read. The search terminates when it can't find more distinct valid alignments, or when it finds INT+1 distinct alignments, whichever happens first. Only the best alignment is reported. Information from the other alignments is used to - estimate mapping quality and to set SAM optional fields, such as AS:i and XS:i. Increasing -M makes + estimate mapping quality and to set SAM optional fields, such as AS:i and XS:i. Increasing -M makes Bowtie 2 slower, but increases the likelihood that it will pick the correct alignment for a read that aligns many places. For reads that have more than INT+1 distinct, valid alignments, Bowtie 2 does not guarantee that the alignment reported is the best possible in terms of alignment score. -M is always used and its default value is set to 10. +]]> </help> + <citations> + <citation type="doi">10.1093/bioinformatics/btr167</citation> + </citations> </tool>
--- a/bismark_bowtie_wrapper.xml Mon Apr 14 16:43:14 2014 -0400 +++ b/bismark_bowtie_wrapper.xml Mon Feb 09 18:24:41 2015 -0500 @@ -7,11 +7,17 @@ <requirement type="package" version="0.1.19">samtools</requirement> <requirement type="package" version="0.12.8">bowtie</requirement> </requirements> - <parallelism method="basic"></parallelism> + <stdio> + <exit_code range="1:" /> + <exit_code range=":-1" /> + <regex match="Error:" /> + <regex match="Exception:" /> + </stdio> <command interpreter="python"> +<![CDATA[ bismark_wrapper.py - --bismark_path \$SCRIPT_PATH + ##--bismark_path \$SCRIPT_PATH ## ## Bismark Genome Preparation, if desired. @@ -53,14 +59,17 @@ --mate1 #echo ','.join($mate1) --mate2 #echo ','.join($mate2) - #if $singlePaired.mate_list[0].input_mate1.ext == "fastqillumina": - --phred64-quals - --fastq - #elif $singlePaired.mate_list[0].input_mate1.ext == "fastqsanger": - --fastq - #elif $singlePaired.mate_list[0].input_mate1.ext == "fasta": - --fasta - #end if + #for $mate_pair in $singlePaired.mate_list: + #if $mate_pair.input_mate1.ext == "fastqillumina": + --phred64-quals + --fastq + #elif $mate_pair.input_mate1.ext == "fastqsanger": + --fastq + #elif $mate_pair.input_mate1.ext == "fasta": + --fasta + #end if + #break + #end for -I $singlePaired.minInsert -X $singlePaired.maxInsert @@ -123,6 +132,7 @@ #end if #end if +]]> </command> <inputs> <conditional name="refGenomeSource"> @@ -191,7 +201,7 @@ <!-- output Options --> <param name="bismark_stdout" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Write the bismark output and summary information to an extra file" /> <param name="isReportOutput" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Offer all report files concatenated in one file" /> - <!--end output options --> + <!--end output options --> </when> <!-- full --> </conditional> <!-- params --> <!-- @@ -251,9 +261,9 @@ </action> </when> <when value="paired"> - <action type="format"> + <!--action type="format"> <option type="from_param" name="singlePaired.mate_list[0].input_mate1" param_attribute="ext" /> - </action> + </action--> </when> </conditional> </actions> @@ -271,9 +281,9 @@ </action> </when> <when value="paired"> - <action type="format"> + <!--action type="format"> <option type="from_param" name="singlePaired.mate_list[0].input_mate1" param_attribute="ext" /> - </action> + </action--> </when> </conditional> </actions> @@ -295,9 +305,9 @@ </action> </when> <when value="paired"> - <action type="format"> + <!--action type="format"> <option type="from_param" name="singlePaired.mate_list[0].input_mate1" param_attribute="ext" /> - </action> + </action--> </when> </conditional> </actions> @@ -314,21 +324,20 @@ </action> </when> <when value="paired"> - <action type="format"> + <!--action type="format"> <option type="from_param" name="singlePaired.mate_list[0].input_mate1" param_attribute="ext" /> - </action> + </action--> </when> </conditional> </actions> </data> - - </outputs> <tests> </tests> <help> +<![CDATA[ **What it does** @@ -386,8 +395,8 @@ Column Description -------- -------------------------------------------------------- 1 QNAME seq-ID - 2 FLAG this flag tries to take the strand a bisulfite read - originated from into account + 2 FLAG this flag tries to take the strand a bisulfite read + originated from into account (this is different from ordinary DNA alignment flags!) 3 RNAME chromosome 4 POS start position @@ -399,12 +408,12 @@ 10 SEQ query SEQuence on the same strand as the reference 11 QUAL Phred33 scale 12 NM-tag edit distance to the reference) - 13 XX-tag base-by-base mismatches to the reference. + 13 XX-tag base-by-base mismatches to the reference. This does not include indels. 14 XM-tag methylation call string 15 XR-tag read conversion state for the alignment 16 XG-tag genome conversion state for the alignment - + Each read of paired-end alignments is written out in a separate line in the above format. @@ -461,7 +470,7 @@ --phred64-quals FASTQ qualities are ASCII chars equal to the Phred quality plus 64. Default: off. --solexa-quals Convert FASTQ qualities from solexa-scaled (which can be negative) to phred-scaled - (which can't). The formula for conversion is: + (which can't). The formula for conversion is: phred-qual = 10 * log(1 + 10 ** (solexa-qual/10.0)) / log(10). Used with -q. This is usually the right option for use with (unconverted) reads emitted by the GA Pipeline versions prior to 1.3. Works only for Bowtie 1. Default: off. @@ -473,7 +482,7 @@ Alignment:: -n/--seedmms INT The maximum number of mismatches permitted in the "seed", i.e. the first L base pairs - of the read (where L is set with -l/--seedlen). This may be 0, 1, 2 or 3 and the + of the read (where L is set with -l/--seedlen). This may be 0, 1, 2 or 3 and the default is 1. This option is only available for Bowtie 1 (for Bowtie 2 see -N). -l/--seedlen The "seed length"; i.e., the number of bases of the high quality end of the read to @@ -546,5 +555,9 @@ the specified folder does not exist, Bismark will attempt to create it first. The path to the temporary folder can be either relative or absolute. +]]> </help> + <citations> + <citation type="doi">10.1093/bioinformatics/btr167</citation> + </citations> </tool>
--- a/bismark_methylation_extractor.xml Mon Apr 14 16:43:14 2014 -0400 +++ b/bismark_methylation_extractor.xml Mon Feb 09 18:24:41 2015 -0500 @@ -9,11 +9,12 @@ </requirements> <parallelism method="basic"></parallelism> <command interpreter="python"> +<![CDATA[ bismark_methylation_extractor.py --infile $input - --bismark_path \$SCRIPT_PATH + #--bismark_path \$SCRIPT_PATH #if $singlePaired.sPaired == "single": --single-end @@ -78,6 +79,7 @@ ## end compress #end if +]]> </command> <inputs> <!-- Input Parameters --> @@ -187,11 +189,12 @@ </tests> <help> +<![CDATA[ **What it does** The following is a brief description of all options to control the Bismark_ -methylation extractor. The script reads in a bisulfite read alignment results file +methylation extractor. The script reads in a bisulfite read alignment results file produced by the Bismark bisulfite mapper and extracts the methylation information for individual cytosines. This information is found in the methylation call field which can contain the following characters: @@ -285,8 +288,8 @@ Output:: - --comprehensive Specifying this option will merge all four possible strand-specific - methylation info into context-dependent output files. The default + --comprehensive Specifying this option will merge all four possible strand-specific + methylation info into context-dependent output files. The default contexts are: - CpG context - CHG context @@ -301,5 +304,6 @@ this script. +]]> </help> </tool>
--- a/bismark_wrapper.py Mon Apr 14 16:43:14 2014 -0400 +++ b/bismark_wrapper.py Mon Feb 09 18:24:41 2015 -0500 @@ -17,7 +17,6 @@ def __main__(): - print 'tempfile_location',tempfile.gettempdir() #Parse Command Line parser = argparse.ArgumentParser(description='Wrapper for the bismark bisulfite mapper.') parser.add_argument( '-p', '--num-threads', dest='num_threads', @@ -258,9 +257,6 @@ # Final bismark command: cmd = cmd % arguments - print 'bismark_cmd:', cmd - #sys.stderr.write( cmd ) - #sys.exit(1) # Run try: tmp_out = tempfile.NamedTemporaryFile().name @@ -321,8 +317,8 @@ """ #tmp_out = tempfile.NamedTemporaryFile( dir=output_dir ).name tmp_stdout = open( tmp_out, 'wab' ) - tmp_err = tempfile.NamedTemporaryFile( dir=output_dir ).name - tmp_stderr = open( tmp_err, 'wb' ) + #tmp_err = tempfile.NamedTemporaryFile( dir=output_dir ).name + tmp_stderr = open( tmp_err, 'wab' ) tmp_res = tempfile.NamedTemporaryFile( dir= output_dir).name @@ -338,18 +334,22 @@ if returncode != 0: raise Exception, open( tmp_stderr.name ).read() else: - tmp_res = bam_files[0] + tmp_res = bam_files[0] bam_path = "%s" % tmp_res if os.path.exists( bam_path ): - if args.sort_bam: - cmd = 'samtools sort -@ %s %s %s' % (args.num_threads, bam_path, args.output) - else: - shutil.copy( bam_path, args.output ) + if args.sort_bam: + cmd = 'samtools sort -@ %s %s sorted_bam' % (args.num_threads, bam_path) + proc = subprocess.Popen( args=shlex.split( cmd ) ) + returncode = proc.wait() + if returncode != 0: + raise Exception("Error during '%s'" % cmd) + shutil.move( 'sorted_bam.bam', args.output ) + else: + shutil.move( bam_path, args.output ) else: stop_err( 'BAM file no found:\n' + str( bam_path ) ) - # TODO: look for errors in program output.
--- a/tool_dependencies.xml Mon Apr 14 16:43:14 2014 -0400 +++ b/tool_dependencies.xml Mon Feb 09 18:24:41 2015 -0500 @@ -1,7 +1,7 @@ <?xml version="1.0"?> <tool_dependency> <package name="samtools" version="0.1.19"> - <repository changeset_revision="00e17a794a2e" name="package_samtools_0_1_19" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="923adc89c666" name="package_samtools_0_1_19" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> <set_environment version="1.0"> <environment_variable action="set_to" name="SCRIPT_PATH">$REPOSITORY_INSTALL_DIR</environment_variable>