venn_list: tools/venn_list/venn

comparison tools/venn_list/venn_list.py @ 6:ea68a1a4c1d9 draft

v0.0.10 explicit galaxy_sequence_utils dependency etc

author	peterjc
date	Thu, 02 Feb 2017 11:17:31 -0500
parents	991342eca214
children	ee50d9ef9d69

comparison

equal deleted inserted replaced

-:26e35d5133a1
+:ea68a1a4c1d9
 """
 import sys
-def sys_exit(msg, error_level=1):
-"""Print error message to stdout and quit with given error level."""
-sys.stderr.write("%s\n" % msg)
-sys.exit(error_level)
 try:
 import rpy
 except ImportError:
-sys_exit("Requires the Python library rpy (to call R)")
+sys.exit("Requires the Python library rpy (to call R)")
 except RuntimeError, e:
-sys_exit("The Python library rpy is not availble for the current R version\n\n%s" % e)
+sys.exit("The Python library rpy is not availble for the current R version\n\n%s" % e)
 try:
 rpy.r.library("limma")
-except:
+except Exception:
-sys_exit("Requires the R library limma (for vennDiagram function)")
+sys.exit("Requires the R library limma (for vennDiagram function)")
-if len(sys.argv)-1 not in [7, 10, 13]:
+if len(sys.argv) - 1 not in [7, 10, 13]:
-sys_exit("Expected 7, 10 or 13 arguments (for 1, 2 or 3 sets), not %i" % (len(sys.argv)-1))
+sys.exit("Expected 7, 10 or 13 arguments (for 1, 2 or 3 sets), not %i" % (len(sys.argv) - 1))
 all_file, all_type, all_label = sys.argv[1:4]
 set_data = []
-if len(sys.argv)-1 >= 7:
+if len(sys.argv) - 1 >= 7:
 set_data.append(tuple(sys.argv[4:7]))
-if len(sys.argv)-1 >= 10:
+if len(sys.argv) - 1 >= 10:
 set_data.append(tuple(sys.argv[7:10]))
-if len(sys.argv)-1 >= 13:
+if len(sys.argv) - 1 >= 13:
 set_data.append(tuple(sys.argv[10:13]))
 pdf_file = sys.argv[-1]
 n = len(set_data)
 print "Doing %i-way Venn Diagram" % n
 def load_ids(filename, filetype):
-if filetype=="tabular":
+if filetype == "tabular":
 for line in open(filename):
 line = line.rstrip("\n")
 if line and not line.startswith("#"):
-yield line.split("\t",1)[0]
+yield line.split("\t", 1)[0]
-elif filetype=="fasta":
+elif filetype == "fasta":
 for line in open(filename):
 if line.startswith(">"):
-yield line[1:].rstrip("\n").split(None,1)[0]
+yield line[1:].rstrip("\n").split(None, 1)[0]
 elif filetype.startswith("fastq"):
-#Use the Galaxy library not Biopython to cope with CS
+# Use the Galaxy library not Biopython to cope with CS
 from galaxy_utils.sequence.fastq import fastqReader
 handle = open(filename, "rU")
 for record in fastqReader(handle):
-#The [1:] is because the fastaReader leaves the @ on the identifer.
+# The [1:] is because the fastaReader leaves the @ on the identifer.
 yield record.identifier.split()[0][1:]
 handle.close()
-elif filetype=="sff":
+elif filetype == "sff":
 try:
 from Bio.SeqIO import index
 except ImportError:
-sys_exit("Require Biopython 1.54 or later (to read SFF files)")
+sys.exit("Require Biopython 1.54 or later (to read SFF files)")
-#This will read the SFF index block if present (very fast)
+# This will read the SFF index block if present (very fast)
 for name in index(filename, "sff"):
 yield name
 else:
-sys_exit("Unexpected file type %s" % filetype)
+sys.exit("Unexpected file type %s" % filetype)
 def load_ids_whitelist(filename, filetype, whitelist):
 for name in load_ids(filename, filetype):
 if name in whitelist:
 yield name
 else:
-sys_exit("Unexpected ID %s in %s file %s" % (name, filetype, filename))
+sys.exit("Unexpected ID %s in %s file %s" % (name, filetype, filename))
 if all_file in ["", "-", '""', '"-"']:
-#Load without white list
+# Load without white list
-sets = [set(load_ids(f,t)) for (f,t,c) in set_data]
+sets = [set(load_ids(f, t)) for (f, t, c) in set_data]
-#Take union
+# Take union
-all = set()
+all_ids = set()
 for s in sets:
-all.update(s)
+all_ids.update(s)
-print "Inferred total of %i IDs" % len(all)
+print "Inferred total of %i IDs" % len(all_ids)
 else:
-all = set(load_ids(all_file, all_type))
+all_ids = set(load_ids(all_file, all_type))
-print "Total of %i IDs" % len(all)
+print "Total of %i IDs" % len(all_ids)
-sets = [set(load_ids_whitelist(f,t,all)) for (f,t,c) in set_data]
+sets = [set(load_ids_whitelist(f, t, all_ids)) for (f, t, c) in set_data]
-for s, (f,t,c) in zip(sets, set_data):
+for s, (f, t, c) in zip(sets, set_data):
 print "%i in %s" % (len(s), c)
-#Now call R library to draw simple Venn diagram
+# Now call R library to draw simple Venn diagram
 try:
-#Create dummy Venn diagram counts object for three groups
+# Create dummy Venn diagram counts object for three groups
-cols = 'c("%s")' % '","'.join("Set%i" % (i+1) for i in range(n))
+cols = 'c("%s")' % '","'.join("Set%i" % (i + 1) for i in range(n))
-rpy.r('groups <- cbind(%s)' % ','.join(['1']*n))
+rpy.r('groups <- cbind(%s)' % ','.join(['1'] * n))
 rpy.r('colnames(groups) <- %s' % cols)
 rpy.r('vc <- vennCounts(groups)')
-#Populate the 2^n classes with real counts
+# Populate the 2^n classes with real counts
-#Don't make any assumptions about the class order
+# Don't make any assumptions about the class order
-#print rpy.r('vc')
+# print rpy.r('vc')
 for index, row in enumerate(rpy.r('vc[,%s]' % cols)):
 if isinstance(row, int) or isinstance(row, float):
-#Hack for rpy being too clever for single element row
+# Hack for rpy being too clever for single element row
 row = [row]
-names = all
+names = all_ids
 for wanted, s in zip(row, sets):
 if wanted:
 names = names.intersection(s)
 else:
 names = names.difference(s)
-rpy.r('vc[%i,"Counts"] <- %i' % (index+1, len(names)))
+rpy.r('vc[%i,"Counts"] <- %i' % (index + 1, len(names)))
-#print rpy.r('vc')
+# print rpy.r('vc')
 if n == 1:
-#Single circle, don't need to add (Total XXX) line
+# Single circle, don't need to add (Total XXX) line
-names = [c for (t,f,c) in set_data]
+names = [c for (t, f, c) in set_data]
 else:
-names = ["%s\n(Total %i)" % (c, len(s)) for s, (f,t,c) in zip(sets, set_data)]
+names = ["%s\n(Total %i)" % (c, len(s)) for s, (f, t, c) in zip(sets, set_data)]
 rpy.r.assign("names", names)
-rpy.r.assign("colors", ["red","green","blue"][:n])
+rpy.r.assign("colors", ["red", "green", "blue"][:n])
 rpy.r.pdf(pdf_file, 8, 8)
 rpy.r("""vennDiagram(vc, include="both", names=names,
 main="%s", sub="(Total %i)",
 circle.col=colors)
-""" % (all_label, len(all)))
+""" % (all_label, len(all_ids)))
 rpy.r.dev_off()
 except Exception, exc:
-sys_exit( "%s" %str( exc ) )
+sys.exit("%s" % str(exc))
-rpy.r.quit( save="no" )
+rpy.r.quit(save="no")
 print "Done"

Mercurial > repos > peterjc > venn_list

comparison tools/venn_list/venn_list.py @ 6:ea68a1a4c1d9 draft