Mercurial > repos > devteam > weightedaverage
diff WeightedAverage.py @ 2:efa2b391e887 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-devteam/tree/master/tools/weightedaverage commit f770c3c58f1e7e1fa5ed22d7f7aca856d36729e8
author | devteam |
---|---|
date | Wed, 05 Oct 2016 13:39:38 -0400 |
parents | 90611e86a998 |
children |
line wrap: on
line diff
--- a/WeightedAverage.py Thu Apr 03 09:34:41 2014 -0400 +++ b/WeightedAverage.py Wed Oct 05 13:39:38 2016 -0400 @@ -3,6 +3,7 @@ usage: %prog bed_file_1 bed_file_2 out_file -1, --cols1=N,N,N,N: Columns for chr, start, end, strand in first file -2, --cols2=N,N,N,N,N: Columns for chr, start, end, strand, name/value in second file + -z, --allow_zeros: Include zeros in calculations """ import collections @@ -42,6 +43,11 @@ def GetOverlap(a, b): return min(a[1], b[1])-max(a[0], b[0]) +def get_float_no_zero( field ): + rval = float( field ) + assert rval + return rval + options, args = doc_optparse.parse( __doc__ ) @@ -53,38 +59,37 @@ print eee stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." ) -fd2 = open(input2) -lines2 = fd2.readlines() +if options.allow_zeros: + get_value = float +else: + get_value = get_float_no_zero RecombChrDict = collections.defaultdict(list) skipped = 0 -for line in lines2: +for line in open( input2 ): temp = line.strip().split() try: - assert float(temp[int(name_col_2)]) - except: + value = get_value( temp[ name_col_2 ] ) + except Exception: skipped += 1 continue - tempIndex = [int(temp[int(start_col_2)]), int(temp[int(end_col_2)]), float(temp[int(name_col_2)])] - RecombChrDict[temp[int(chr_col_2)]].append(tempIndex) + tempIndex = [ int( temp[ start_col_2 ] ), int( temp[ end_col_2 ] ), value ] + RecombChrDict[ temp[ chr_col_2 ] ].append( tempIndex ) print "Skipped %d features with invalid values" % (skipped) -fd1 = open(input1) -lines = fd1.readlines() -finalProduct = '' -for line in lines: - temp = line.strip().split('\t') - chromosome = temp[int(chr_col_1)] - start = int(temp[int(start_col_1)]) - stop = int(temp[int(end_col_1)]) +fdd = open( input3, 'w' ) +for line in open( input1 ): + line = line.strip() + temp = line.split('\t') + chromosome = temp[ chr_col_1 ] + start = int( temp[ start_col_1 ] ) + stop = int( temp[ end_col_1 ] ) start_stop = [start, stop] RecombRate = FindRate( chromosome, start_stop, RecombChrDict ) try: RecombRate = "%.4f" % (float(RecombRate)) except: RecombRate = RecombRate - finalProduct += line.strip()+'\t'+str(RecombRate)+'\n' -fdd = open(input3, 'w') -fdd.writelines(finalProduct) + fdd.write( "%s\t%s\n" % ( line, RecombRate ) ) fdd.close()