Mercurial > repos > matces > carpet_toolsuite
view carpet-src-1/tools/CARPET/TSS_distance.py @ 0:cdd489d98766
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author | matces |
---|---|
date | Tue, 07 Jun 2011 16:50:41 -0400 |
parents | |
children |
line wrap: on
line source
#!/usr/bin/env python # Copyright 2009 Matteo Cesaroni, Lucilla Luzi # # This program is free software; ; you can redistribute it and/or modify # it under the terms of the GNU Lesser General Public License as published by # the Free Software Foundation; either version 3 of the License, or (at your # option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. import sys from rpy import * def stop_err(msg): sys.stderr.write(msg) sys.exit() def main(): # Handle input params in_fname = sys.argv[1] out_fname = sys.argv[2] try: column = int( sys.argv[3] ) - 1 except: stop_err( "..Column not specified, your query does not contain a column of numerical data." ) title = sys.argv[4] xlab = sys.argv[5] breaks = int( sys.argv[6] ) if breaks == 0: breaks = "Sturges" if sys.argv[7] == "true": density = True else: density = False matrix = [] skipped_lines = 0 first_invalid_line = 0 invalid_value = '' for i, line in enumerate( file( in_fname ) ): valid = True line = line.rstrip('\r\n') # Skip comments if line and not line.startswith( '#' ): # Extract values and convert to floats row = [] try: fields = line.split( "\t" ) val = fields[column] if val.lower() == "na": row.append( float( "nan" ) ) if float(val) > float(xlab): val = (float(xlab)+2000) row.append( float( val ) ) except: valid = False skipped_lines += 1 if not first_invalid_line: first_invalid_line = i+1 else: try: row.append( float( val ) ) except ValueError: valid = False skipped_lines += 1 if not first_invalid_line: first_invalid_line = i+1 invalid_value = fields[column] else: valid = False skipped_lines += 1 if not first_invalid_line: first_invalid_line = i+1 if valid: matrix.append( row ) if skipped_lines < i: print "..on columnn %s" %sys.argv[3] try: a = array( matrix ) r.pdf( out_fname, 8, 8 ) r.hist( a, probability=True, main=title, xlab="TSS distance", breaks=breaks ) if density: r.lines( r.density( a ) ) r.dev_off() except exc: stop_err("Building histogram resulted in error: %s." %str( exc )) else: print "..all values in column %s are non-numeric." %sys.argv[3] if skipped_lines > 0: print "..skipped %d invalid lines starting with line #%d. Value '%s' is not numeric." % ( skipped_lines, first_invalid_line, invalid_value ) r.quit( save="no" ) if __name__ == "__main__": main()