# HG changeset patch
# User peterjc
# Date 1486052251 18000
# Node ID ea68a1a4c1d94bf6579918a316f02cdf3f505d2f
# Parent 26e35d5133a1dfb6645f148d4efc9ec6857571ce
v0.0.10 explicit galaxy_sequence_utils dependency etc
diff -r 26e35d5133a1 -r ea68a1a4c1d9 tools/venn_list/README.rst
--- a/tools/venn_list/README.rst Sat Oct 10 08:52:01 2015 -0400
+++ b/tools/venn_list/README.rst Thu Feb 02 11:17:31 2017 -0500
@@ -1,7 +1,7 @@
Galaxy tool to draw a Venn Diagram with up to 3 sets
====================================================
-This tool is copyright 2011-2015 by Peter Cock, The James Hutton Institute
+This tool is copyright 2011-2017 by Peter Cock, The James Hutton Institute
(formerly SCRI, Scottish Crop Research Institute), UK. All rights reserved.
See the licence text below.
@@ -72,6 +72,9 @@
- Includes testing of failure mode.
- Planemo for Tool Shed upload (``.shed.yml``, internal change only).
- Tool Shed dependency for rpy and limma (thanks to Björn Grüning).
+v0.0.10 - Updated to point at Biopython 1.67 (latest version in Tool Shed).
+ - Explicit dependency on ``galaxy_sequence_utils``.
+ - Python style updates (internal change only).
======= ======================================================================
diff -r 26e35d5133a1 -r ea68a1a4c1d9 tools/venn_list/tool_dependencies.xml
--- a/tools/venn_list/tool_dependencies.xml Sat Oct 10 08:52:01 2015 -0400
+++ b/tools/venn_list/tool_dependencies.xml Thu Feb 02 11:17:31 2017 -0500
@@ -1,12 +1,15 @@
+
+
+
-
-
+
+
diff -r 26e35d5133a1 -r ea68a1a4c1d9 tools/venn_list/venn_list.py
--- a/tools/venn_list/venn_list.py Sat Oct 10 08:52:01 2015 -0400
+++ b/tools/venn_list/venn_list.py Thu Feb 02 11:17:31 2017 -0500
@@ -11,127 +11,124 @@
import sys
-def sys_exit(msg, error_level=1):
- """Print error message to stdout and quit with given error level."""
- sys.stderr.write("%s\n" % msg)
- sys.exit(error_level)
-
try:
import rpy
except ImportError:
- sys_exit("Requires the Python library rpy (to call R)")
+ sys.exit("Requires the Python library rpy (to call R)")
except RuntimeError, e:
- sys_exit("The Python library rpy is not availble for the current R version\n\n%s" % e)
+ sys.exit("The Python library rpy is not availble for the current R version\n\n%s" % e)
try:
rpy.r.library("limma")
-except:
- sys_exit("Requires the R library limma (for vennDiagram function)")
+except Exception:
+ sys.exit("Requires the R library limma (for vennDiagram function)")
-if len(sys.argv)-1 not in [7, 10, 13]:
- sys_exit("Expected 7, 10 or 13 arguments (for 1, 2 or 3 sets), not %i" % (len(sys.argv)-1))
+if len(sys.argv) - 1 not in [7, 10, 13]:
+ sys.exit("Expected 7, 10 or 13 arguments (for 1, 2 or 3 sets), not %i" % (len(sys.argv) - 1))
all_file, all_type, all_label = sys.argv[1:4]
set_data = []
-if len(sys.argv)-1 >= 7:
+if len(sys.argv) - 1 >= 7:
set_data.append(tuple(sys.argv[4:7]))
-if len(sys.argv)-1 >= 10:
+if len(sys.argv) - 1 >= 10:
set_data.append(tuple(sys.argv[7:10]))
-if len(sys.argv)-1 >= 13:
+if len(sys.argv) - 1 >= 13:
set_data.append(tuple(sys.argv[10:13]))
pdf_file = sys.argv[-1]
n = len(set_data)
print "Doing %i-way Venn Diagram" % n
+
def load_ids(filename, filetype):
- if filetype=="tabular":
+ if filetype == "tabular":
for line in open(filename):
line = line.rstrip("\n")
if line and not line.startswith("#"):
- yield line.split("\t",1)[0]
- elif filetype=="fasta":
+ yield line.split("\t", 1)[0]
+ elif filetype == "fasta":
for line in open(filename):
if line.startswith(">"):
- yield line[1:].rstrip("\n").split(None,1)[0]
+ yield line[1:].rstrip("\n").split(None, 1)[0]
elif filetype.startswith("fastq"):
- #Use the Galaxy library not Biopython to cope with CS
+ # Use the Galaxy library not Biopython to cope with CS
from galaxy_utils.sequence.fastq import fastqReader
handle = open(filename, "rU")
for record in fastqReader(handle):
- #The [1:] is because the fastaReader leaves the @ on the identifer.
+ # The [1:] is because the fastaReader leaves the @ on the identifer.
yield record.identifier.split()[0][1:]
handle.close()
- elif filetype=="sff":
+ elif filetype == "sff":
try:
from Bio.SeqIO import index
except ImportError:
- sys_exit("Require Biopython 1.54 or later (to read SFF files)")
- #This will read the SFF index block if present (very fast)
+ sys.exit("Require Biopython 1.54 or later (to read SFF files)")
+ # This will read the SFF index block if present (very fast)
for name in index(filename, "sff"):
yield name
else:
- sys_exit("Unexpected file type %s" % filetype)
+ sys.exit("Unexpected file type %s" % filetype)
+
def load_ids_whitelist(filename, filetype, whitelist):
for name in load_ids(filename, filetype):
if name in whitelist:
yield name
else:
- sys_exit("Unexpected ID %s in %s file %s" % (name, filetype, filename))
+ sys.exit("Unexpected ID %s in %s file %s" % (name, filetype, filename))
if all_file in ["", "-", '""', '"-"']:
- #Load without white list
- sets = [set(load_ids(f,t)) for (f,t,c) in set_data]
- #Take union
- all = set()
+ # Load without white list
+ sets = [set(load_ids(f, t)) for (f, t, c) in set_data]
+ # Take union
+ all_ids = set()
for s in sets:
- all.update(s)
- print "Inferred total of %i IDs" % len(all)
+ all_ids.update(s)
+ print "Inferred total of %i IDs" % len(all_ids)
else:
- all = set(load_ids(all_file, all_type))
- print "Total of %i IDs" % len(all)
- sets = [set(load_ids_whitelist(f,t,all)) for (f,t,c) in set_data]
+ all_ids = set(load_ids(all_file, all_type))
+ print "Total of %i IDs" % len(all_ids)
+ sets = [set(load_ids_whitelist(f, t, all_ids)) for (f, t, c) in set_data]
-for s, (f,t,c) in zip(sets, set_data):
+for s, (f, t, c) in zip(sets, set_data):
print "%i in %s" % (len(s), c)
-#Now call R library to draw simple Venn diagram
+# Now call R library to draw simple Venn diagram
try:
- #Create dummy Venn diagram counts object for three groups
- cols = 'c("%s")' % '","'.join("Set%i" % (i+1) for i in range(n))
- rpy.r('groups <- cbind(%s)' % ','.join(['1']*n))
+ # Create dummy Venn diagram counts object for three groups
+ cols = 'c("%s")' % '","'.join("Set%i" % (i + 1) for i in range(n))
+ rpy.r('groups <- cbind(%s)' % ','.join(['1'] * n))
rpy.r('colnames(groups) <- %s' % cols)
rpy.r('vc <- vennCounts(groups)')
- #Populate the 2^n classes with real counts
- #Don't make any assumptions about the class order
- #print rpy.r('vc')
+ # Populate the 2^n classes with real counts
+ # Don't make any assumptions about the class order
+ # print rpy.r('vc')
for index, row in enumerate(rpy.r('vc[,%s]' % cols)):
if isinstance(row, int) or isinstance(row, float):
- #Hack for rpy being too clever for single element row
+ # Hack for rpy being too clever for single element row
row = [row]
- names = all
+ names = all_ids
for wanted, s in zip(row, sets):
if wanted:
names = names.intersection(s)
else:
names = names.difference(s)
- rpy.r('vc[%i,"Counts"] <- %i' % (index+1, len(names)))
- #print rpy.r('vc')
+ rpy.r('vc[%i,"Counts"] <- %i' % (index + 1, len(names)))
+ # print rpy.r('vc')
if n == 1:
- #Single circle, don't need to add (Total XXX) line
- names = [c for (t,f,c) in set_data]
+ # Single circle, don't need to add (Total XXX) line
+ names = [c for (t, f, c) in set_data]
else:
- names = ["%s\n(Total %i)" % (c, len(s)) for s, (f,t,c) in zip(sets, set_data)]
+ names = ["%s\n(Total %i)" % (c, len(s)) for s, (f, t, c) in zip(sets, set_data)]
rpy.r.assign("names", names)
- rpy.r.assign("colors", ["red","green","blue"][:n])
+ rpy.r.assign("colors", ["red", "green", "blue"][:n])
rpy.r.pdf(pdf_file, 8, 8)
rpy.r("""vennDiagram(vc, include="both", names=names,
main="%s", sub="(Total %i)",
circle.col=colors)
- """ % (all_label, len(all)))
+ """ % (all_label, len(all_ids)))
rpy.r.dev_off()
except Exception, exc:
- sys_exit( "%s" %str( exc ) )
-rpy.r.quit( save="no" )
+ sys.exit("%s" % str(exc))
+rpy.r.quit(save="no")
print "Done"
diff -r 26e35d5133a1 -r ea68a1a4c1d9 tools/venn_list/venn_list.xml
--- a/tools/venn_list/venn_list.xml Sat Oct 10 08:52:01 2015 -0400
+++ b/tools/venn_list/venn_list.xml Thu Feb 02 11:17:31 2017 -0500
@@ -1,157 +1,158 @@
-
- from lists
-
- rpy
- Bio
- rpy
- limma
- biopython
-
-
-
-
-
-
-
-venn_list.py
-#if $universe.type_select=="implicit":
- - -
-#else:
- "$main" $main.ext
-#end if
-"$main_lab"
-#for $s in $sets:
- "$s.set" $s.set.ext "$s.lab"
-#end for
-$PDF
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-.. class:: infomark
-
-**TIP:** If your data is in tabular files, the identifier is assumed to be in column one.
-
-**What it does**
-
-Draws Venn Diagram for one, two or three sets (as a PDF file).
-
-You must supply one, two or three sets of identifiers -- corresponding
-to one, two or three circles on the Venn Diagram.
-
-In general you should also give the full list of all the identifiers
-explicitly. This is used to calculate the number of identifers outside
-the circles (and check the identifiers in the other files match up).
-The full list can be omitted by implicitly taking the union of the
-category sets. In this case, the count outside the categories (circles)
-will always be zero.
-
-The identifiers can be taken from the first column of a tabular file
-(e.g. query names in BLAST tabular output, or signal peptide predictions
-after filtering, etc), or from a sequence file (FASTA, FASTQ, SFF).
-
-For example, you may have a set of NGS reads (as a FASTA, FASTQ or SFF
-file), and the results of several different read mappings (e.g. to
-different references) as tabular files (filtered to have just the mapped
-reads). You could then show the different mappings (and their overlaps)
-as a Venn Diagram, and the outside count would be the unmapped reads.
-
-**Citations**
-
-The Venn Diagrams are drawn using Gordon Smyth's limma package from
-R/Bioconductor, http://www.bioconductor.org/
-
-The R library is called from Python via rpy, http://rpy.sourceforge.net/
-
-If you use this Galaxy tool in work leading to a scientific publication please
-cite:
-
-Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
-Galaxy tools and workflows for sequence analysis with applications
-in molecular plant pathology. PeerJ 1:e167
-http://dx.doi.org/10.7717/peerj.167
-
-This tool uses Biopython to read and write SFF files, so you may also wish to
-cite the Biopython application note (and Galaxy too of course):
-
-Cock et al 2009. Biopython: freely available Python tools for computational
-molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.
-http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.
-
-
-
- 10.7717/peerj.167
- 10.1093/bioinformatics/15.5.356
-
-
+
+ from lists
+
+ galaxy_sequence_utils
+ rpy
+ Bio
+ rpy
+ limma
+ biopython
+
+
+
+
+
+
+
+venn_list.py
+#if $universe.type_select=="implicit":
+ - -
+#else:
+ "$main" $main.ext
+#end if
+"$main_lab"
+#for $s in $sets:
+ "$s.set" $s.set.ext "$s.lab"
+#end for
+$PDF
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+.. class:: infomark
+
+**TIP:** If your data is in tabular files, the identifier is assumed to be in column one.
+
+**What it does**
+
+Draws Venn Diagram for one, two or three sets (as a PDF file).
+
+You must supply one, two or three sets of identifiers -- corresponding
+to one, two or three circles on the Venn Diagram.
+
+In general you should also give the full list of all the identifiers
+explicitly. This is used to calculate the number of identifers outside
+the circles (and check the identifiers in the other files match up).
+The full list can be omitted by implicitly taking the union of the
+category sets. In this case, the count outside the categories (circles)
+will always be zero.
+
+The identifiers can be taken from the first column of a tabular file
+(e.g. query names in BLAST tabular output, or signal peptide predictions
+after filtering, etc), or from a sequence file (FASTA, FASTQ, SFF).
+
+For example, you may have a set of NGS reads (as a FASTA, FASTQ or SFF
+file), and the results of several different read mappings (e.g. to
+different references) as tabular files (filtered to have just the mapped
+reads). You could then show the different mappings (and their overlaps)
+as a Venn Diagram, and the outside count would be the unmapped reads.
+
+**Citations**
+
+The Venn Diagrams are drawn using Gordon Smyth's limma package from
+R/Bioconductor, http://www.bioconductor.org/
+
+The R library is called from Python via rpy, http://rpy.sourceforge.net/
+
+If you use this Galaxy tool in work leading to a scientific publication please
+cite:
+
+Peter J.A. Cock, Björn A. Grüning, Konrad Paszkiewicz and Leighton Pritchard (2013).
+Galaxy tools and workflows for sequence analysis with applications
+in molecular plant pathology. PeerJ 1:e167
+http://dx.doi.org/10.7717/peerj.167
+
+This tool uses Biopython to read and write SFF files, so you may also wish to
+cite the Biopython application note (and Galaxy too of course):
+
+Cock et al 2009. Biopython: freely available Python tools for computational
+molecular biology and bioinformatics. Bioinformatics 25(11) 1422-3.
+http://dx.doi.org/10.1093/bioinformatics/btp163 pmid:19304878.
+
+
+
+ 10.7717/peerj.167
+ 10.1093/bioinformatics/15.5.356
+
+