comparison carpet-src-1/tools/CARPET/TSS_distance.py @ 0:cdd489d98766

Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author matces
date Tue, 07 Jun 2011 16:50:41 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:cdd489d98766
1 #!/usr/bin/env python
2
3 # Copyright 2009 Matteo Cesaroni, Lucilla Luzi
4 #
5 # This program is free software; ; you can redistribute it and/or modify
6 # it under the terms of the GNU Lesser General Public License as published by
7 # the Free Software Foundation; either version 3 of the License, or (at your
8 # option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14
15
16 import sys
17 from rpy import *
18
19
20 def stop_err(msg):
21 sys.stderr.write(msg)
22 sys.exit()
23
24 def main():
25
26 # Handle input params
27 in_fname = sys.argv[1]
28 out_fname = sys.argv[2]
29 try:
30 column = int( sys.argv[3] ) - 1
31 except:
32 stop_err( "..Column not specified, your query does not contain a column of numerical data." )
33 title = sys.argv[4]
34 xlab = sys.argv[5]
35 breaks = int( sys.argv[6] )
36 if breaks == 0: breaks = "Sturges"
37 if sys.argv[7] == "true": density = True
38 else: density = False
39
40
41
42 matrix = []
43 skipped_lines = 0
44 first_invalid_line = 0
45 invalid_value = ''
46
47 for i, line in enumerate( file( in_fname ) ):
48 valid = True
49 line = line.rstrip('\r\n')
50 # Skip comments
51 if line and not line.startswith( '#' ):
52 # Extract values and convert to floats
53 row = []
54 try:
55 fields = line.split( "\t" )
56 val = fields[column]
57 if val.lower() == "na":
58 row.append( float( "nan" ) )
59 if float(val) > float(xlab):
60 val = (float(xlab)+2000)
61
62 row.append( float( val ) )
63 except:
64 valid = False
65 skipped_lines += 1
66 if not first_invalid_line:
67 first_invalid_line = i+1
68 else:
69 try:
70 row.append( float( val ) )
71 except ValueError:
72 valid = False
73 skipped_lines += 1
74 if not first_invalid_line:
75 first_invalid_line = i+1
76 invalid_value = fields[column]
77 else:
78 valid = False
79 skipped_lines += 1
80 if not first_invalid_line:
81 first_invalid_line = i+1
82
83 if valid:
84 matrix.append( row )
85
86 if skipped_lines < i:
87 print "..on columnn %s" %sys.argv[3]
88 try:
89 a = array( matrix )
90 r.pdf( out_fname, 8, 8 )
91 r.hist( a, probability=True, main=title, xlab="TSS distance", breaks=breaks )
92 if density:
93 r.lines( r.density( a ) )
94 r.dev_off()
95 except exc:
96 stop_err("Building histogram resulted in error: %s." %str( exc ))
97 else:
98 print "..all values in column %s are non-numeric." %sys.argv[3]
99
100 if skipped_lines > 0:
101 print "..skipped %d invalid lines starting with line #%d. Value '%s' is not numeric." % ( skipped_lines, first_invalid_line, invalid_value )
102
103 r.quit( save="no" )
104
105 if __name__ == "__main__":
106 main()