annotate WeightedAverage.py @ 0:9b7b4009f2c0 draft

Imported from capsule None
author devteam
date Tue, 01 Apr 2014 10:48:52 -0400
parents
children 90611e86a998
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
1 #!/usr/bin/env python
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
2 """
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
3 usage: %prog bed_file_1 bed_file_2 out_file
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
4 -1, --cols1=N,N,N,N: Columns for chr, start, end, strand in first file
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
5 -2, --cols2=N,N,N,N,N: Columns for chr, start, end, strand, name/value in second file
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
6 """
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
7
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
8 import collections
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
9 import sys
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
10 #import numpy
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
11 from galaxy import eggs
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
12 import pkg_resources
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
13 pkg_resources.require( "bx-python" )
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
14 from galaxy.tools.util.galaxyops import *
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
15 from bx.cookbook import doc_optparse
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
16
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
17
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
18 #export PYTHONPATH=~/galaxy/lib/
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
19 #running command python WeightedAverage.py interval_interpolate.bed value_interpolate.bed interpolate_result.bed
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
20
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
21 def stop_err(msg):
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
22 sys.stderr.write(msg)
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
23 sys.exit()
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
24
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
25
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
26 def FindRate(chromosome, start_stop, dictType):
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
27 OverlapList = []
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
28 for tempO in dictType[chromosome]:
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
29 DatabaseInterval = [tempO[0], tempO[1]]
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
30 Overlap = GetOverlap( start_stop, DatabaseInterval )
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
31 if Overlap > 0:
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
32 OverlapList.append([Overlap, tempO[2]])
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
33
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
34 if len(OverlapList) > 0:
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
35 SumRecomb = 0
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
36 SumOverlap = 0
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
37 for member in OverlapList:
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
38 SumRecomb += member[0]*member[1]
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
39 SumOverlap += member[0]
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
40 averageRate = SumRecomb/SumOverlap
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
41 return averageRate
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
42 else:
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
43 return 'NA'
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
44
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
45
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
46 def GetOverlap(a, b):
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
47 return min(a[1], b[1])-max(a[0], b[0])
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
48
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
49
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
50 options, args = doc_optparse.parse( __doc__ )
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
51
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
52 try:
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
53 chr_col_1, start_col_1, end_col_1, strand_col1 = parse_cols_arg( options.cols1 )
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
54 chr_col_2, start_col_2, end_col_2, strand_col2, name_col_2 = parse_cols_arg( options.cols2 )
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
55 input1, input2, input3 = args
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
56 except Exception, eee:
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
57 print eee
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
58 stop_err( "Data issue: click the pencil icon in the history item to correct the metadata attributes." )
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
59
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
60 fd2 = open(input2)
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
61 lines2 = fd2.readlines()
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
62 RecombChrDict = collections.defaultdict(list)
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
63
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
64 skipped = 0
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
65 for line in lines2:
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
66 temp = line.strip().split()
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
67 try:
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
68 assert float(temp[int(name_col_2)])
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
69 except:
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
70 skipped += 1
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
71 continue
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
72 tempIndex = [int(temp[int(start_col_2)]), int(temp[int(end_col_2)]), float(temp[int(name_col_2)])]
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
73 RecombChrDict[temp[int(chr_col_2)]].append(tempIndex)
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
74
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
75 print "Skipped %d features with invalid values" % (skipped)
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
76
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
77 fd1 = open(input1)
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
78 lines = fd1.readlines()
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
79 finalProduct = ''
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
80 for line in lines:
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
81 temp = line.strip().split('\t')
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
82 chromosome = temp[int(chr_col_1)]
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
83 start = int(temp[int(start_col_1)])
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
84 stop = int(temp[int(end_col_1)])
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
85 start_stop = [start, stop]
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
86 RecombRate = FindRate( chromosome, start_stop, RecombChrDict )
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
87 try:
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
88 RecombRate = "%.4f" % (float(RecombRate))
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
89 except:
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
90 RecombRate = RecombRate
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
91 finalProduct += line.strip()+'\t'+str(RecombRate)+'\n'
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
92 fdd = open(input3, 'w')
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
93 fdd.writelines(finalProduct)
9b7b4009f2c0 Imported from capsule None
devteam
parents:
diff changeset
94 fdd.close()