Mercurial > repos > peterjc > fastq_paired_unpaired
changeset 6:f396701fbf32 draft
v0.1.3 Depends on Biopython 1.67 via Tool Shed package or bioconda.
author | peterjc |
---|---|
date | Wed, 10 May 2017 13:28:59 -0400 |
parents | b38bbcbd458d |
children | 2709a0f065c9 |
files | tools/fastq_paired_unpaired/README.rst tools/fastq_paired_unpaired/fastq_paired_unpaired.py tools/fastq_paired_unpaired/fastq_paired_unpaired.xml tools/fastq_paired_unpaired/tool_dependencies.xml |
diffstat | 4 files changed, 37 insertions(+), 35 deletions(-) [+] |
line wrap: on
line diff
--- a/tools/fastq_paired_unpaired/README.rst Wed Aug 05 11:17:49 2015 -0400 +++ b/tools/fastq_paired_unpaired/README.rst Wed May 10 13:28:59 2017 -0400 @@ -1,7 +1,7 @@ Galaxy tool to divide FASTQ files into paired and unpaired reads ================================================================ -This tool is copyright 2010-2015 by Peter Cock, The James Hutton Institute +This tool is copyright 2010-2017 by Peter Cock, The James Hutton Institute (formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved. See the licence text below (MIT licence). @@ -71,6 +71,9 @@ - Use ``format_source=...`` tag. - Planemo for Tool Shed upload (``.shed.yml``, internal change only). v0.1.2 - Belatedly declare Biopython dependency via Tool Shed. +v0.1.3 - Minor internal changes to Python script for error reporting & style. + - Updated to point at Biopython 1.67 (latest version in Tool Shed). + - Explicit dependency on ``galaxy_sequence_utils``. ======= ====================================================================== @@ -88,17 +91,17 @@ Planemo commands (which requires you have set your Tool Shed access details in ``~/.planemo.yml`` and that you have access rights on the Tool Shed):: - $ planemo shed_update --shed_target testtoolshed --check_diff ~/repositories/pico_galaxy/tools/fastq_paired_unpaired/ + $ planemo shed_update -t testtoolshed --check_diff tools/fastq_paired_unpaired/ ... or:: - $ planemo shed_update --shed_target toolshed --check_diff ~/repositories/pico_galaxy/tools/fastq_paired_unpaired/ + $ planemo shed_update -t toolshed --check_diff tools/fastq_paired_unpaired/ ... To just build and check the tar ball, use:: - $ planemo shed_upload --tar_only ~/repositories/pico_galaxy/tools/fastq_paired_unpaired/ + $ planemo shed_upload --tar_only tools/fastq_paired_unpaired/ ... $ tar -tzf shed_upload.tar.gz test-data/sanger-pairs-forward.fastq
--- a/tools/fastq_paired_unpaired/fastq_paired_unpaired.py Wed Aug 05 11:17:49 2015 -0400 +++ b/tools/fastq_paired_unpaired/fastq_paired_unpaired.py Wed May 10 13:28:59 2017 -0400 @@ -14,22 +14,18 @@ See accompanying text file for licence details (MIT license). """ -import os + +import re import sys -import re if "-v" in sys.argv or "--version" in sys.argv: - print("Version 0.1.0") + print("Version 0.1.3") sys.exit(0) -def sys_exit(msg, err=1): - sys.stderr.write(msg.rstrip() + "\n") - sys.exit(err) - try: from Bio.SeqIO.QualityIO import FastqGeneralIterator except ImportError: - sys_exit("Biopython missing") + sys.exit("Biopython missing") msg = """Expect either 3 or 4 arguments, all FASTQ filenames. @@ -58,7 +54,7 @@ same identifier with the fragment at the start of the description, e.g. @HWI-ST916:79:D04M5ACXX:1:1101:10000:100326 1:N:0:TGNCCA -@HWI-ST916:79:D04M5ACXX:1:1101:10000:100326 2:N:0:TGNCCA +@HWI-ST916:79:D04M5ACXX:1:1101:10000:100326 2:N:0:TGNCCA Note that this does support multiple forward and reverse reads per template (which is quite common with Sanger sequencing), e.g. this which is sorted @@ -83,28 +79,28 @@ """ if len(sys.argv) == 5: - format, input_fastq, pairs_fastq, singles_fastq = sys.argv[1:] + seq_format, input_fastq, pairs_fastq, singles_fastq = sys.argv[1:] elif len(sys.argv) == 6: pairs_fastq = None - format, input_fastq, pairs_f_fastq, pairs_r_fastq, singles_fastq = sys.argv[1:] + seq_format, input_fastq, pairs_f_fastq, pairs_r_fastq, singles_fastq = sys.argv[1:] else: - sys_exit(msg) + sys.exit(msg) -format = format.replace("fastq", "").lower() -if not format: - format="sanger" #safe default -elif format not in ["sanger","solexa","illumina","cssanger"]: - sys_exit("Unrecognised format %s" % format) +seq_format = seq_format.replace("fastq", "").lower() +if not seq_format: + seq_format = "sanger" # safe default +elif seq_format not in ["sanger", "solexa", "illumina", "cssanger"]: + sys.exit("Unrecognised format %s" % seq_format) -#Cope with three widely used suffix naming convensions, -#Illumina: /1 or /2 -#Forward/revered: .f or .r -#Sanger, e.g. .p1k and .q1k -#See http://staden.sourceforge.net/manual/pregap4_unix_50.html +# Cope with three widely used suffix naming convensions, +# Illumina: /1 or /2 +# Forward/revered: .f or .r +# Sanger, e.g. .p1k and .q1k +# See http://staden.sourceforge.net/manual/pregap4_unix_50.html re_f = re.compile(r"(/1|\.f|\.[sfp]\d\w*)$") re_r = re.compile(r"(/2|\.r|\.[rq]\d\w*)$") -#assert re_f.match("demo/1") +# assert re_f.match("demo/1") assert re_f.search("demo.f") assert re_f.search("demo.s1") assert re_f.search("demo.f1k") @@ -144,7 +140,7 @@ for title, seq, qual in FastqGeneralIterator(in_handle): count += 1 - name = title.split(None,1)[0] + name = title.split(None, 1)[0] is_forward = False suffix = re_f.search(name) if suffix: @@ -220,7 +216,7 @@ for old in buffered_reads: singles_handle.write(FASTQ_TEMPLATE % old) singles += 1 -in_handle.close +in_handle.close() singles_handle.close() if pairs_fastq: pairs_f_handle.close() @@ -238,4 +234,4 @@ assert count == pairs + singles == forward + reverse + neither, \ "%i vs %i+%i=%i vs %i+%i+%i=%i" \ - % (count,pairs,singles,pairs+singles,forward,reverse,neither,forward+reverse+neither) + % (count, pairs, singles, pairs + singles, forward, reverse, neither, forward + reverse + neither)
--- a/tools/fastq_paired_unpaired/fastq_paired_unpaired.xml Wed Aug 05 11:17:49 2015 -0400 +++ b/tools/fastq_paired_unpaired/fastq_paired_unpaired.xml Wed May 10 13:28:59 2017 -0400 @@ -1,8 +1,8 @@ -<tool id="fastq_paired_unpaired" name="Divide FASTQ file into paired and unpaired reads" version="0.1.2"> +<tool id="fastq_paired_unpaired" name="Divide FASTQ file into paired and unpaired reads" version="0.1.3"> <description>using the read name suffices</description> <requirements> - <requirement type="package" version="1.65">biopython</requirement> - <requirement type="python-module">Bio</requirement> + <requirement type="package" version="1.0.1">galaxy_sequence_utils</requirement> + <requirement type="package" version="1.67">biopython</requirement> </requirements> <stdio> <!-- Anything other than zero is an error -->
--- a/tools/fastq_paired_unpaired/tool_dependencies.xml Wed Aug 05 11:17:49 2015 -0400 +++ b/tools/fastq_paired_unpaired/tool_dependencies.xml Wed May 10 13:28:59 2017 -0400 @@ -1,6 +1,9 @@ <?xml version="1.0"?> <tool_dependency> - <package name="biopython" version="1.65"> - <repository changeset_revision="dc595937617c" name="package_biopython_1_65" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" /> + <package name="galaxy_sequence_utils" version="1.0.1"> + <repository changeset_revision="c1ab450748ba" name="package_galaxy_sequence_utils_1_0_1" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <package name="biopython" version="1.67"> + <repository changeset_revision="a42f244cce44" name="package_biopython_1_67" owner="biopython" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> </tool_dependency>