Mercurial > repos > peterjc > sample_seqs
changeset 6:31f5701cd2e9 draft
v0.2.4 Depends on Biopython 1.67 via legacy Tool Shed package or bioconda.
author | peterjc |
---|---|
date | Thu, 11 May 2017 07:24:38 -0400 |
parents | 6b71ad5d43fb |
children | 86710edcec02 |
files | tools/sample_seqs/README.rst tools/sample_seqs/sample_seqs.py tools/sample_seqs/sample_seqs.xml tools/sample_seqs/tool_dependencies.xml |
diffstat | 4 files changed, 18 insertions(+), 14 deletions(-) [+] |
line wrap: on
line diff
--- a/tools/sample_seqs/README.rst Wed Feb 01 09:39:36 2017 -0500 +++ b/tools/sample_seqs/README.rst Thu May 11 07:24:38 2017 -0400 @@ -1,7 +1,7 @@ Galaxy tool to sub-sample sequence files ======================================== -This tool is copyright 2014-2015 by Peter Cock, The James Hutton Institute +This tool is copyright 2014-2017 by Peter Cock, The James Hutton Institute (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. See the licence text below (MIT licence). @@ -71,6 +71,8 @@ - Planemo for Tool Shed upload (``.shed.yml``, internal change only). v0.2.3 - Do the Biopython imports at the script start (internal change only). - Clarify paired read example in help text. +v0.2.4 - Depends on Biopython 1.67 via legacy Tool Shed package or bioconda. + - Style changes to Python code (internal change only). ======= ====================================================================== @@ -84,17 +86,17 @@ Planemo commands (which requires you have set your Tool Shed access details in ``~/.planemo.yml`` and that you have access rights on the Tool Shed):: - $ planemo shed_update -t testtoolshed --check_diff ~/repositories/pico_galaxy/tools/sample_seqs/ + $ planemo shed_update -t testtoolshed --check_diff tools/sample_seqs/ ... or:: - $ planemo shed_update -t toolshed --check_diff ~/repositories/pico_galaxy/tools/sample_seqs/ + $ planemo shed_update -t toolshed --check_diff tools/sample_seqs/ ... To just build and check the tar ball, use:: - $ planemo shed_upload --tar_only ~/repositories/pico_galaxy/tools/sample_seqs/ + $ planemo shed_upload --tar_only tools/sample_seqs/ ... $ tar -tzf shed_upload.tar.gz test-data/MID4_GLZRM4E04_rnd30_frclip.pair_sample_N5.sff
--- a/tools/sample_seqs/sample_seqs.py Wed Feb 01 09:39:36 2017 -0500 +++ b/tools/sample_seqs/sample_seqs.py Thu May 11 07:24:38 2017 -0400 @@ -63,7 +63,7 @@ options, args = parser.parse_args() if options.version: - print("v0.2.3") + print("v0.2.4") sys.exit(0) try: @@ -146,6 +146,7 @@ sys.stderr.write("Sampling every %ith sequence\n" % N) def sampler(iterator): + """Sample every Nth sequence.""" global N count = 0 for record in iterator: @@ -157,11 +158,12 @@ percent = float(options.percent) / 100.0 except ValueError: sys.exit("Bad -p percent argument %r" % options.percent) - if percent <= 0.0 or 1.0 <= percent: + if not(0.0 <= percent <= 1.0): sys.exit("Bad -p percent argument %r" % options.percent) sys.stderr.write("Sampling %0.3f%% of sequences\n" % (100.0 * percent)) def sampler(iterator): + """Sample given percentage of sequences.""" global percent count = 0 taken = 0 @@ -215,6 +217,7 @@ assert taken == N, "Picked %i, wanted %i" % (taken, N) else: def sampler(iterator): + """Sample given number of sequences.""" # Mimic the percentage sampler, with double check on final count global N, total # Do we need a floating point fudge factor epsilon? @@ -268,12 +271,11 @@ raise ValueError( "Records in Fasta files should start with '>' character") try: - id = line[1:].split(None, 1)[0] + line[1:].split(None, 1)[0] except IndexError: if not no_id_warned: sys.stderr.write("WARNING - Malformed FASTA entry with no identifier\n") - no_id_warned = True - id = None + no_id_warned = True lines = [line] line = handle.readline() while True: @@ -346,6 +348,7 @@ count = writer.write_file(iterator_filter(SffIterator(in_handle))) return count + if seq_format == "sff": count = sff_filter(in_file, out_file, sampler, interleaved) elif seq_format == "fasta":
--- a/tools/sample_seqs/sample_seqs.xml Wed Feb 01 09:39:36 2017 -0500 +++ b/tools/sample_seqs/sample_seqs.xml Thu May 11 07:24:38 2017 -0400 @@ -1,8 +1,7 @@ -<tool id="sample_seqs" name="Sub-sample sequences files" version="0.2.3"> +<tool id="sample_seqs" name="Sub-sample sequences files" version="0.2.4"> <description>e.g. to reduce coverage</description> <requirements> - <requirement type="package" version="1.65">biopython</requirement> - <requirement type="python-module">Bio</requirement> + <requirement type="package" version="1.67">biopython</requirement> </requirements> <stdio> <!-- Anything other than zero is an error -->
--- a/tools/sample_seqs/tool_dependencies.xml Wed Feb 01 09:39:36 2017 -0500 +++ b/tools/sample_seqs/tool_dependencies.xml Thu May 11 07:24:38 2017 -0400 @@ -1,6 +1,6 @@ <?xml version="1.0"?> <tool_dependency> - <package name="biopython" version="1.65"> - <repository changeset_revision="d8185f5631ed" name="package_biopython_1_65" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" /> + <package name="biopython" version="1.67"> + <repository changeset_revision="a42f244cce44" name="package_biopython_1_67" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> </tool_dependency>