diff WeightedAverage.py @ 2:efa2b391e887 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/weightedaverage commit f770c3c58f1e7e1fa5ed22d7f7aca856d36729e8
author devteam
date Wed, 05 Oct 2016 13:39:38 -0400
parents 90611e86a998
children
line wrap: on
line diff
--- a/WeightedAverage.py	Thu Apr 03 09:34:41 2014 -0400
+++ b/WeightedAverage.py	Wed Oct 05 13:39:38 2016 -0400
@@ -3,6 +3,7 @@
 usage: %prog bed_file_1 bed_file_2 out_file
     -1, --cols1=N,N,N,N: Columns for chr, start, end, strand in first file
     -2, --cols2=N,N,N,N,N: Columns for chr, start, end, strand, name/value in second file
+    -z, --allow_zeros: Include zeros in calculations
 """
 
 import collections
@@ -42,6 +43,11 @@
 def GetOverlap(a, b):
     return min(a[1], b[1])-max(a[0], b[0])
 
+def get_float_no_zero( field ):
+    rval = float( field )
+    assert rval
+    return rval
+
 
 options, args = doc_optparse.parse( __doc__ )
 
@@ -53,38 +59,37 @@
     print eee
     stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." )
 
-fd2 = open(input2)
-lines2 = fd2.readlines()
+if options.allow_zeros:
+    get_value = float
+else:
+    get_value = get_float_no_zero
 RecombChrDict = collections.defaultdict(list)
 
 skipped = 0
-for line in lines2:
+for line in open( input2 ):
     temp = line.strip().split()
     try:
-        assert float(temp[int(name_col_2)])
-    except:
+        value = get_value( temp[ name_col_2 ] )
+    except Exception:
         skipped += 1
         continue
-    tempIndex = [int(temp[int(start_col_2)]), int(temp[int(end_col_2)]), float(temp[int(name_col_2)])]
-    RecombChrDict[temp[int(chr_col_2)]].append(tempIndex)
+    tempIndex = [ int( temp[ start_col_2 ] ), int( temp[ end_col_2 ] ), value ]
+    RecombChrDict[ temp[ chr_col_2 ] ].append( tempIndex )
 
 print "Skipped %d features with invalid values" % (skipped)
 
-fd1 = open(input1)
-lines = fd1.readlines()
-finalProduct = ''
-for line in lines:
-    temp = line.strip().split('\t')
-    chromosome = temp[int(chr_col_1)]
-    start = int(temp[int(start_col_1)])
-    stop = int(temp[int(end_col_1)])
+fdd = open( input3, 'w' )
+for line in open( input1 ):
+    line = line.strip()
+    temp = line.split('\t')
+    chromosome = temp[ chr_col_1 ]
+    start = int( temp[ start_col_1 ] )
+    stop = int( temp[ end_col_1 ] )
     start_stop = [start, stop]
     RecombRate = FindRate( chromosome, start_stop, RecombChrDict )
     try:
         RecombRate = "%.4f" % (float(RecombRate))
     except:
         RecombRate = RecombRate
-    finalProduct += line.strip()+'\t'+str(RecombRate)+'\n'
-fdd = open(input3, 'w')
-fdd.writelines(finalProduct)
+    fdd.write( "%s\t%s\n" % ( line, RecombRate ) )
 fdd.close()