Mercurial > repos > matces > carpet_toolsuite
diff carpet-src-1/tools/CARPET/TSS_distance.py @ 0:cdd489d98766
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author | matces |
---|---|
date | Tue, 07 Jun 2011 16:50:41 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/carpet-src-1/tools/CARPET/TSS_distance.py Tue Jun 07 16:50:41 2011 -0400 @@ -0,0 +1,106 @@ +#!/usr/bin/env python + +# Copyright 2009 Matteo Cesaroni, Lucilla Luzi +# +# This program is free software; ; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation; either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. + + +import sys +from rpy import * + + +def stop_err(msg): + sys.stderr.write(msg) + sys.exit() + +def main(): + + # Handle input params + in_fname = sys.argv[1] + out_fname = sys.argv[2] + try: + column = int( sys.argv[3] ) - 1 + except: + stop_err( "..Column not specified, your query does not contain a column of numerical data." ) + title = sys.argv[4] + xlab = sys.argv[5] + breaks = int( sys.argv[6] ) + if breaks == 0: breaks = "Sturges" + if sys.argv[7] == "true": density = True + else: density = False + + + + matrix = [] + skipped_lines = 0 + first_invalid_line = 0 + invalid_value = '' + + for i, line in enumerate( file( in_fname ) ): + valid = True + line = line.rstrip('\r\n') + # Skip comments + if line and not line.startswith( '#' ): + # Extract values and convert to floats + row = [] + try: + fields = line.split( "\t" ) + val = fields[column] + if val.lower() == "na": + row.append( float( "nan" ) ) + if float(val) > float(xlab): + val = (float(xlab)+2000) + + row.append( float( val ) ) + except: + valid = False + skipped_lines += 1 + if not first_invalid_line: + first_invalid_line = i+1 + else: + try: + row.append( float( val ) ) + except ValueError: + valid = False + skipped_lines += 1 + if not first_invalid_line: + first_invalid_line = i+1 + invalid_value = fields[column] + else: + valid = False + skipped_lines += 1 + if not first_invalid_line: + first_invalid_line = i+1 + + if valid: + matrix.append( row ) + + if skipped_lines < i: + print "..on columnn %s" %sys.argv[3] + try: + a = array( matrix ) + r.pdf( out_fname, 8, 8 ) + r.hist( a, probability=True, main=title, xlab="TSS distance", breaks=breaks ) + if density: + r.lines( r.density( a ) ) + r.dev_off() + except exc: + stop_err("Building histogram resulted in error: %s." %str( exc )) + else: + print "..all values in column %s are non-numeric." %sys.argv[3] + + if skipped_lines > 0: + print "..skipped %d invalid lines starting with line #%d. Value '%s' is not numeric." % ( skipped_lines, first_invalid_line, invalid_value ) + + r.quit( save="no" ) + +if __name__ == "__main__": + main()