# HG changeset patch
# User peterjc
# Date 1337358312 14400
# Node ID 50a8a6917a9c4a0562670397127c6a9fc59ac620
# Parent 838b9bebfa3c475287f1b2c78d5ae6bd32dfe74c
Uploaded update (v0.0.3) to ignore blank lines in the ID file
diff -r 838b9bebfa3c -r 50a8a6917a9c tools/filters/seq_select_by_id.py
--- a/tools/filters/seq_select_by_id.py Tue Jun 07 17:43:38 2011 -0400
+++ b/tools/filters/seq_select_by_id.py Fri May 18 12:25:12 2012 -0400
@@ -16,11 +16,11 @@
molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.
http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.
-This script is copyright 2011 by Peter Cock, The James Hutton Institute UK.
+This script is copyright 2011-2012 by Peter Cock, The James Hutton Institute UK.
All rights reserved. See accompanying text file for licence details (MIT/BSD
style).
-This is version 0.0.1 of the script.
+This is version 0.0.3 of the script.
"""
import sys
@@ -39,7 +39,7 @@
else:
column = int(col_arg)-1
except ValueError:
- stop_err("Expected column number, got %s" % cols_arg)
+ stop_err("Expected column number, got %s" % col_arg)
if seq_format == "fastqcssanger":
stop_err("Colorspace FASTQ not supported.")
@@ -65,7 +65,7 @@
"""Read tabular file and record all specified identifiers."""
handle = open(tabular_file, "rU")
for line in handle:
- if not line.startswith("#"):
+ if line.strip() and not line.startswith("#"):
yield line.rstrip("\n").split("\t")[col].strip()
handle.close()
@@ -105,7 +105,7 @@
except KeyError, err:
out_handle.close()
if name not in records:
- stop_err("Identifier %s not found in sequence file" % name)
+ stop_err("Identifier %r not found in sequence file" % name)
else:
raise err
out_handle.close()
@@ -119,7 +119,7 @@
out_handle.write(records.get_raw(name))
except KeyError:
out_handle.close()
- stop_err("Identifier %s not found in sequence file" % name)
+ stop_err("Identifier %r not found in sequence file" % name)
count += 1
out_handle.close()
diff -r 838b9bebfa3c -r 50a8a6917a9c tools/filters/seq_select_by_id.txt
--- a/tools/filters/seq_select_by_id.txt Tue Jun 07 17:43:38 2011 -0400
+++ b/tools/filters/seq_select_by_id.txt Fri May 18 12:25:12 2012 -0400
@@ -1,5 +1,5 @@
-Galaxy tool to select FASTA, FASTQ or SFF sequences by ID
-=========================================================
+Galaxy tool to select FASTA, QUAL, FASTQ or SFF sequences by ID
+===============================================================
This tool is copyright 2011 by Peter Cock, The James Hutton Institute
(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
@@ -26,7 +26,7 @@
You will also need to modify the tools_conf.xml file to tell Galaxy to offer the
tool. One suggested location is in the filters section. Simply add the line:
-
+
You will also need to install Biopython 1.54 or later. That's it.
@@ -35,7 +35,7 @@
=======
v0.0.1 - Initial version.
-
+v0.0.3 - Ignore blank lines in input
Developers
==========
diff -r 838b9bebfa3c -r 50a8a6917a9c tools/filters/seq_select_by_id.xml
--- a/tools/filters/seq_select_by_id.xml Tue Jun 07 17:43:38 2011 -0400
+++ b/tools/filters/seq_select_by_id.xml Fri May 18 12:25:12 2012 -0400
@@ -1,4 +1,4 @@
-
+
from a tabular file
seq_select_by_id.py $input_tabular $column $input_file $input_file.ext $output_file