Mercurial > repos > peterjc > count_roi_variants
changeset 1:3ee6f4d0ac80 draft
v0.0.5 Fix version inconsistency, using samtools v1.2 now.
author | peterjc |
---|---|
date | Tue, 16 May 2017 09:14:18 -0400 |
parents | 95efbdb72961 |
children | 167765a633c0 |
files | tools/count_roi_variants/README.rst tools/count_roi_variants/count_roi_variants.py tools/count_roi_variants/count_roi_variants.xml |
diffstat | 3 files changed, 48 insertions(+), 21 deletions(-) [+] |
line wrap: on
line diff
--- a/tools/count_roi_variants/README.rst Wed Feb 01 07:10:26 2017 -0500 +++ b/tools/count_roi_variants/README.rst Tue May 16 09:14:18 2017 -0400 @@ -80,6 +80,9 @@ v0.0.2 - Cope with pipes in reference name (e.g. NCBI style FASTA naming) v0.0.3 - Include a functional test for using an unrecognised reference. v0.0.4 - Improved usage text and README for use outside of Galaxy. +v0.0.5 - Fix samtools dependency version inconsistency, using v1.2 now. + - Use ``<command detect_errors="aggressive">`` (internal change only). + - Single quote command line arguments (internal change only). ======= ====================================================================== @@ -93,17 +96,17 @@ Planemo commands (which requires you have set your Tool Shed access details in ``~/.planemo.yml`` and that you have access rights on the Tool Shed):: - $ planemo shed_update -t testtoolshed --check_diff ~/repositories/pico_galaxy/tools/count_roi_variants/ + $ planemo shed_update -t testtoolshed --check_diff tools/count_roi_variants/ ... or:: - $ planemo shed_update -t toolshed --check_diff ~/repositories/pico_galaxy/tools/count_roi_variants/ + $ planemo shed_update -t toolshed --check_diff tools/count_roi_variants/ ... To just build and check the tar ball, use:: - $ planemo shed_upload --tar_only ~/repositories/pico_galaxy/tools/count_roi_variants/ + $ planemo shed_upload --tar_only tools/count_roi_variants/ ... $ tar -tzf shed_upload.tar.gz tools/count_roi_variants/README.rst
--- a/tools/count_roi_variants/count_roi_variants.py Wed Feb 01 07:10:26 2017 -0500 +++ b/tools/count_roi_variants/count_roi_variants.py Tue May 16 09:14:18 2017 -0400 @@ -12,9 +12,10 @@ the observed variants spanning the ROI, and outputs this as a tabular file. """ -import sys + import os import subprocess +import sys import tempfile if "-v" in sys.argv or "--version" in sys.argv: @@ -97,7 +98,9 @@ return answer -assert decode_cigar("14S15M1P1D3P54M1D34M5S") == [(14, 'S'), (15, 'M'), (1, 'P'), (1, 'D'), (3, 'P'), (54, 'M'), (1, 'D'), (34, 'M'), (5, 'S')] +assert decode_cigar("14S15M1P1D3P54M1D34M5S") == [(14, 'S'), (15, 'M'), (1, 'P'), + (1, 'D'), (3, 'P'), (54, 'M'), + (1, 'D'), (34, 'M'), (5, 'S')] def align_len(cigar_ops): @@ -138,16 +141,32 @@ else: raise NotImplementedError("Unexpected CIGAR operator %s" % op) + assert list(expand_cigar("ACGT", decode_cigar("4M"))) == [(0, "A"), (1, "C"), (2, "G"), (3, "T")] assert list(expand_cigar("ACGT", decode_cigar("2=1X1="))) == [(0, "A"), (1, "C"), (2, "G"), (3, "T")] assert list(expand_cigar("ACGT", decode_cigar("2M1D2M"))) == [(0, "A"), (1, "C"), (3, "G"), (4, "T")] assert list(expand_cigar("ACtGT", decode_cigar("2M1I2M"))) == [(0, "A"), (1, "C"), (1.5, "t"), (2, "G"), (3, "T")] assert list(expand_cigar("tACGT", decode_cigar("1I4M"))) == [(-0.5, 't'), (0, 'A'), (1, 'C'), (2, 'G'), (3, 'T')] assert list(expand_cigar("ACGTt", decode_cigar("4M1I"))) == [(0, 'A'), (1, 'C'), (2, 'G'), (3, 'T'), (3.5, 't')] -assert list(expand_cigar("AAAAGGGGTTTT", decode_cigar("12M"))) == [(0, 'A'), (1, 'A'), (2, 'A'), (3, 'A'), (4, 'G'), (5, 'G'), (6, 'G'), (7, 'G'), (8, 'T'), (9, 'T'), (10, 'T'), (11, 'T')] -assert list(expand_cigar("AAAAcGGGGTTTT", decode_cigar("4M1I8M"))) == [(0, 'A'), (1, 'A'), (2, 'A'), (3, 'A'), (3.5, 'c'), (4, 'G'), (5, 'G'), (6, 'G'), (7, 'G'), (8, 'T'), (9, 'T'), (10, 'T'), (11, 'T')] -assert list(expand_cigar("AAAAGGGGcTTTT", decode_cigar("8M1I4M"))) == [(0, 'A'), (1, 'A'), (2, 'A'), (3, 'A'), (4, 'G'), (5, 'G'), (6, 'G'), (7, 'G'), (7.5, "c"), (8, 'T'), (9, 'T'), (10, 'T'), (11, 'T')] -assert list(expand_cigar("AAAAcGGGGcTTTT", decode_cigar("4M1I4M1I4M"))) == [(0, 'A'), (1, 'A'), (2, 'A'), (3, 'A'), (3.5, 'c'), (4, 'G'), (5, 'G'), (6, 'G'), (7, 'G'), (7.5, 'c'), (8, 'T'), (9, 'T'), (10, 'T'), (11, 'T')] +assert list(expand_cigar("AAAAGGGGTTTT", decode_cigar("12M"))) == [(0, 'A'), (1, 'A'), (2, 'A'), + (3, 'A'), (4, 'G'), (5, 'G'), + (6, 'G'), (7, 'G'), (8, 'T'), + (9, 'T'), (10, 'T'), (11, 'T')] +assert list(expand_cigar("AAAAcGGGGTTTT", decode_cigar("4M1I8M"))) == [(0, 'A'), (1, 'A'), (2, 'A'), + (3, 'A'), (3.5, 'c'), (4, 'G'), + (5, 'G'), (6, 'G'), (7, 'G'), + (8, 'T'), (9, 'T'), (10, 'T'), + (11, 'T')] +assert list(expand_cigar("AAAAGGGGcTTTT", decode_cigar("8M1I4M"))) == [(0, 'A'), (1, 'A'), (2, 'A'), + (3, 'A'), (4, 'G'), (5, 'G'), + (6, 'G'), (7, 'G'), (7.5, "c"), + (8, 'T'), (9, 'T'), (10, 'T'), + (11, 'T')] +assert list(expand_cigar("AAAAcGGGGcTTTT", decode_cigar("4M1I4M1I4M"))) == [(0, 'A'), (1, 'A'), (2, 'A'), + (3, 'A'), (3.5, 'c'), (4, 'G'), + (5, 'G'), (6, 'G'), (7, 'G'), + (7.5, 'c'), (8, 'T'), (9, 'T'), + (10, 'T'), (11, 'T')] def get_roi(seq, cigar_ops, start, end): @@ -167,6 +186,7 @@ # want to exclude e.g. 3.5 and 7.5 when given start 4 and end 8. return "".join(base for i, base in expand_cigar(seq, cigar_ops) if start <= i <= end - 1) + assert "GGGG" == get_roi("AAAAGGGGTTTT", decode_cigar("12M"), 4, 8) assert "GGGG" == get_roi("AAAAcGGGGTTTT", decode_cigar("4M1I8M"), 4, 8) assert "GGGG" == get_roi("AAAAGGGGcTTTT", decode_cigar("8M1I4M"), 4, 8) @@ -176,6 +196,10 @@ def count_region(): + """Count reads mapped to the region. + + Calls samtools view and parses the output. + """ # Could recreate the region string (with no commas in start/end)? # region = "%s:%i-%i" % (ref, start, end) @@ -211,13 +235,15 @@ if return_code: sys.exit("Got return code %i from samtools view" % return_code) elif "specifies an unknown reference name. Continue anyway." in stderr: - sys.exit(stderr.strip() + "\n\nERROR: samtools did not recognise the region requested, can't count any variants.") + sys.exit(stderr.strip() + + "\n\nERROR: samtools did not recognise the region requested, " + "can't count any variants.") return tally def record_counts(): - + """Record counts to tabular file.""" tally = count_region() total = sum(tally.values())
--- a/tools/count_roi_variants/count_roi_variants.xml Wed Feb 01 07:10:26 2017 -0500 +++ b/tools/count_roi_variants/count_roi_variants.xml Tue May 16 09:14:18 2017 -0400 @@ -1,16 +1,14 @@ -<tool id="count_roi_variants" name="Count sequence variants in region of interest" version="0.0.3"> +<tool id="count_roi_variants" name="Count sequence variants in region of interest" version="0.0.5"> <description>using samtools view</description> <requirements> - <requirement type="binary">samtools</requirement> - <requirement type="package" version="1.2.2">samtools</requirement> + <requirement type="package" version="1.2">samtools</requirement> </requirements> - <stdio> - <!-- Assume anything other than zero is an error --> - <exit_code range="1:" /> - <exit_code range=":-1" /> - </stdio> - <version_command interpreter="python">count_roi_variants.py --version</version_command> - <command interpreter="python">count_roi_variants.py "$input_bam" "${input_bam.metadata.bam_index}" "$out_tabular" "$region"</command> + <version_command> +python $__tool_directory__/count_roi_variants.py --version + </version_command> + <command detect_errors="aggressive"> +python $__tool_directory__/count_roi_variants.py '$input_bam' '${input_bam.metadata.bam_index}' '$out_tabular' '$region' + </command> <inputs> <param name="input_bam" type="data" format="bam" label="Input BAM file" /> <param name="region" type="text" label="Region of interest" help="Use the reference:start-end syntax as in samtools.">