view carpet-src-1/tools/CARPET/TSS_distance.py @ 0:cdd489d98766

Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author matces
date Tue, 07 Jun 2011 16:50:41 -0400
parents
children
line wrap: on
line source

#!/usr/bin/env python

# Copyright 2009 Matteo Cesaroni, Lucilla Luzi
#
# This program is free software; ; you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.


import sys
from rpy import *


def stop_err(msg):
    sys.stderr.write(msg)
    sys.exit()

def main():

    # Handle input params
    in_fname = sys.argv[1]
    out_fname = sys.argv[2] 
    try:
        column = int( sys.argv[3] ) - 1
    except:
        stop_err( "..Column not specified, your query does not contain a column of numerical data." )
    title = sys.argv[4]
    xlab = sys.argv[5]
    breaks = int( sys.argv[6] )
    if breaks == 0: breaks = "Sturges"
    if sys.argv[7] == "true": density = True
    else: density = False
	
	
	
    matrix = []
    skipped_lines = 0
    first_invalid_line = 0
    invalid_value = ''

    for i, line in enumerate( file( in_fname ) ):
        valid = True
        line = line.rstrip('\r\n')
        # Skip comments
        if line and not line.startswith( '#' ): 
            # Extract values and convert to floats
            row = []
            try:
                fields = line.split( "\t" )
                val = fields[column]
                if val.lower() == "na":
                    row.append( float( "nan" ) )
                if float(val) > float(xlab):
               		val = (float(xlab)+2000)
               	
               	row.append( float( val ) ) 
            except:
                valid = False
                skipped_lines += 1
                if not first_invalid_line:
                    first_invalid_line = i+1
            else:
                try:
                    row.append( float( val ) )
                except ValueError:
                    valid = False
                    skipped_lines += 1
                    if not first_invalid_line:
                        first_invalid_line = i+1
                        invalid_value = fields[column]
        else:
            valid = False
            skipped_lines += 1
            if not first_invalid_line:
                first_invalid_line = i+1

        if valid:
            matrix.append( row )

    if skipped_lines < i:
        print "..on columnn %s" %sys.argv[3]
        try:
            a = array( matrix )
            r.pdf( out_fname, 8, 8 )
            r.hist( a, probability=True, main=title, xlab="TSS distance", breaks=breaks )
            if density:
                r.lines( r.density( a ) )
            r.dev_off()
        except exc:
          		stop_err("Building histogram resulted in error: %s." %str( exc ))
    else:
       print "..all values in column %s are non-numeric." %sys.argv[3]

    if skipped_lines > 0:
        print "..skipped %d invalid lines starting with line #%d.  Value '%s' is not numeric." % ( skipped_lines, first_invalid_line, invalid_value )

    r.quit( save="no" )
    
if __name__ == "__main__":
   main()