annotate assembly_stats_txt.py @ 0:44c401ebc424 draft

Uploaded
author aaronpetkau
date Sat, 04 Jul 2015 08:57:35 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
1 #!/usr/bin/env python
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
2 #Version 1.01 - bugs kindly corrected by Jan van Haarst
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
3 import pkg_resources
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
4 import logging, os, string, sys, tempfile, glob, shutil, types, urllib
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
5 import shlex, subprocess
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
6 from optparse import OptionParser, OptionGroup
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
7 from stat import *
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
8
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
9
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
10 log = logging.getLogger( __name__ )
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
11
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
12 assert sys.version_info[:2] >= ( 2, 4 )
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
13
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
14 def stop_err( msg ):
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
15 sys.stderr.write( "%s\n" % msg )
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
16 sys.exit()
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
17
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
18 def __main__():
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
19 #Parse Command Line
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
20 s = 'assembly_stats_txt.py: argv = %s\n' % (sys.argv)
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
21 argcnt = len(sys.argv)
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
22 html_file = sys.argv[1]
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
23 working_dir = sys.argv[2]
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
24 type = sys.argv[3]
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
25 bucket = sys.argv[4]
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
26 input = sys.argv[5]
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
27 stats = sys.argv[6]
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
28 sortedcontigs = sys.argv[7]
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
29 histogrampng = sys.argv[8]
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
30 summedcontigspng = sys.argv[9]
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
31 histogramdata = sys.argv[10]
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
32 summedcontigdata = sys.argv[11]
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
33 try: # for test - needs this done
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
34 os.makedirs(working_dir)
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
35 except Exception, e:
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
36 stop_err( 'Error running assembly_stats_txt.py ' + str( e ) )
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
37
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
38
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
39 cmdline = '%s/fasta_summary.pl -i %s -t %s %s -o %s > /dev/null' % (os.path.dirname(sys.argv[0]),input, type, bucket, working_dir)
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
40 try:
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
41 proc = subprocess.Popen( args=cmdline, shell=True, stderr=subprocess.PIPE )
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
42 returncode = proc.wait()
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
43 # get stderr, allowing for case where it's very large
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
44 stderr = ''
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
45 buffsize = 1048576
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
46 try:
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
47 while True:
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
48 stderr += proc.stderr.read( buffsize )
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
49 if not stderr or len( stderr ) % buffsize != 0:
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
50 break
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
51 except OverflowError:
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
52 pass
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
53 if returncode != 0:
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
54 raise Exception, stderr
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
55 except Exception, e:
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
56 stop_err( 'Error running assembly_stats.py ' + str( e ) )
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
57
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
58 stats_path = os.path.join(working_dir,'stats.txt')
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
59 sorted_contigs_path = os.path.join(working_dir,'sorted_contigs.fa')
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
60 histogram_png_path = os.path.join(working_dir,'histogram_bins.dat.png')
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
61 summed_contigs_path = os.path.join(working_dir,'summed_contig_lengths.dat.png')
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
62 histogram_data_path = os.path.join(working_dir,'histogram_bins.dat')
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
63 summed_contigs_data_path = os.path.join(working_dir,'summed_contig_lengths.dat')
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
64
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
65 out = open(stats,'w')
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
66 for line in open( stats_path ):
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
67 out.write( "%s" % (line) )
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
68 out.close()
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
69
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
70 out = open(sortedcontigs,'w')
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
71 for line in open(sorted_contigs_path ):
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
72 out.write( "%s" % (line) )
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
73 out.close()
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
74
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
75 out = open(histogrampng,'w')
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
76 for line in open(histogram_png_path ):
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
77 out.write( "%s" % (line) )
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
78 out.close()
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
79
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
80 out = open(summedcontigspng,'w')
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
81 for line in open(summed_contigs_path ):
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
82 out.write( "%s" % (line) )
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
83 out.close()
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
84
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
85
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
86 out = open(histogramdata,'w')
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
87 for line in open(histogram_data_path ):
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
88 out.write( "%s" % (line) )
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
89 out.close()
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
90
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
91 out = open(summedcontigdata,'w')
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
92 for line in open(summed_contigs_data_path ):
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
93 out.write( "%s" % (line) )
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
94 out.close()
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
95
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
96
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
97
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
98
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
99
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
100
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
101
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
102
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
103
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
104 # rval = ['<html><head><title>Assembly stats Galaxy Composite Dataset </title></head><p/>']
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
105 # rval.append('<div>%s<p/></div>' % (cmdline) )
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
106 # rval.append('<div>This composite dataset is composed of the following files:<p/><ul>')
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
107 # rval.append( '<li><a href="%s" type="text/plain">%s </a>%s</li>' % (stats_path,'stats.txt','stats.txt' ) )
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
108 # rval.append( '<li><a href="%s" type="text/plain">%s </a>%s</li>' % (sorted_contigs_path,'sorted_contigs.fa','sorted_contigs.fa' ) )
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
109 # rval.append( '<li><a href="%s" type="image/png">%s </a>%s</li>' % (histogram_png_path,'histogram_bins.dat.png','histogram_bins.dat.png' ) )
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
110 # rval.append( '<li><a href="%s" type="image/png">%s </a>%s</li>' % (summed_contigs_path,'summed_contig_lengths.dat.png','summed_contig_lengths.dat.png' ) )
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
111 # rval.append( '<li><a href="%s" type="text/plain">%s </a>%s</li>' % (histogram_data_path,'histogram_bins.dat','histogram_bins.dat' ) )
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
112 # rval.append( '<li><a href="%s" type="text/plain">%s </a>%s</li>' % (summed_contigs_data_path,'summed_contig_lengths.dat','summed_contig_lengths.dat' ) )
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
113
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
114
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
115 #
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
116 # rval.append( '</ul></div></html>' )
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
117 # f = file(html_file,'w')
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
118 # f.write("\n".join( rval ))
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
119 # f.write('\n')
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
120 # f.close()
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
121
44c401ebc424 Uploaded
aaronpetkau
parents:
diff changeset
122 if __name__ == "__main__": __main__()