diff carpet-src-1/tools/CARPET/TSS_distance.py @ 0:cdd489d98766

Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author matces
date Tue, 07 Jun 2011 16:50:41 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/carpet-src-1/tools/CARPET/TSS_distance.py	Tue Jun 07 16:50:41 2011 -0400
@@ -0,0 +1,106 @@
+#!/usr/bin/env python
+
+# Copyright 2009 Matteo Cesaroni, Lucilla Luzi
+#
+# This program is free software; ; you can redistribute it and/or modify
+# it under the terms of the GNU Lesser General Public License as published by
+# the Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+# 
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+
+import sys
+from rpy import *
+
+
+def stop_err(msg):
+    sys.stderr.write(msg)
+    sys.exit()
+
+def main():
+
+    # Handle input params
+    in_fname = sys.argv[1]
+    out_fname = sys.argv[2] 
+    try:
+        column = int( sys.argv[3] ) - 1
+    except:
+        stop_err( "..Column not specified, your query does not contain a column of numerical data." )
+    title = sys.argv[4]
+    xlab = sys.argv[5]
+    breaks = int( sys.argv[6] )
+    if breaks == 0: breaks = "Sturges"
+    if sys.argv[7] == "true": density = True
+    else: density = False
+	
+	
+	
+    matrix = []
+    skipped_lines = 0
+    first_invalid_line = 0
+    invalid_value = ''
+
+    for i, line in enumerate( file( in_fname ) ):
+        valid = True
+        line = line.rstrip('\r\n')
+        # Skip comments
+        if line and not line.startswith( '#' ): 
+            # Extract values and convert to floats
+            row = []
+            try:
+                fields = line.split( "\t" )
+                val = fields[column]
+                if val.lower() == "na":
+                    row.append( float( "nan" ) )
+                if float(val) > float(xlab):
+               		val = (float(xlab)+2000)
+               	
+               	row.append( float( val ) ) 
+            except:
+                valid = False
+                skipped_lines += 1
+                if not first_invalid_line:
+                    first_invalid_line = i+1
+            else:
+                try:
+                    row.append( float( val ) )
+                except ValueError:
+                    valid = False
+                    skipped_lines += 1
+                    if not first_invalid_line:
+                        first_invalid_line = i+1
+                        invalid_value = fields[column]
+        else:
+            valid = False
+            skipped_lines += 1
+            if not first_invalid_line:
+                first_invalid_line = i+1
+
+        if valid:
+            matrix.append( row )
+
+    if skipped_lines < i:
+        print "..on columnn %s" %sys.argv[3]
+        try:
+            a = array( matrix )
+            r.pdf( out_fname, 8, 8 )
+            r.hist( a, probability=True, main=title, xlab="TSS distance", breaks=breaks )
+            if density:
+                r.lines( r.density( a ) )
+            r.dev_off()
+        except exc:
+          		stop_err("Building histogram resulted in error: %s." %str( exc ))
+    else:
+       print "..all values in column %s are non-numeric." %sys.argv[3]
+
+    if skipped_lines > 0:
+        print "..skipped %d invalid lines starting with line #%d.  Value '%s' is not numeric." % ( skipped_lines, first_invalid_line, invalid_value )
+
+    r.quit( save="no" )
+    
+if __name__ == "__main__":
+   main()