annotate commons/tools/srptTableOverlap.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
1 #!/usr/bin/env python
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
2
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
3 import os
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
4 import sys
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
5 import getopt
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
6 import logging
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
7 import string
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
8 import ConfigParser
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
9
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
10 from pyRepet.sql.TableAdaptator import *
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
11 import pyRepet.sql.RepetDBMySQL
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
12 import pyRepet.coord.Map
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
13 import pyRepet.coord.Path
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
14 import pyRepet.coord.Set
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
15
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
16
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
17 def help():
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
18 print
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
19 print "usage: %s [ options ]" % ( sys.argv[0].split("/")[-1] )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
20 print "options:"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
21 print " -h: this help"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
22 print " -q: query table"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
23 print " -s: subject table"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
24 print " -p: by path"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
25 print " -t: table type comparison: qtype/stype where qtype=[map,set,path] and stype=[path,set,map]"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
26 print " -c: configuration file from TEdenovo or TEannot pipeline"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
27 print " -H: MySQL host (if no configuration file)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
28 print " -U: MySQL user (if no configuration file)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
29 print " -P: MySQL password (if no configuration file)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
30 print " -D: MySQL database (if no configuration file)"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
31 print
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
32
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
33
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
34 def pathOverlapByPath( qtable, qtype, stable, stype, db, fout, verbose=0 ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
35
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
36 if qtype == "path":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
37 db.create_path_index( qtable )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
38 qtablePathAdaptator = TablePathAdaptator( db, qtable )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
39 path_num_list = qtablePathAdaptator.getPath_num()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
40 elif qtype == "set":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
41 db.create_set_index( qtable )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
42 qtableSetAdaptator = TableSetAdaptator( db, qtable )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
43 path_num_list = qtableSetAdaptator.getSet_num()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
44 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
45 string = "unknown query table type: %s" % ( qtype )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
46 if verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
47 print string
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
48 logging.error( string )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
49 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
50 string = "nb of paths in query table: %i" % (len(path_num_list) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
51 if verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
52 print string
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
53 logging.info( string )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
54
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
55 if stype == "path":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
56 stablePathAdaptator = TableBinPathAdaptator( db, stable )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
57 # stablePathAdaptator=TablePathAdaptator(db,stable)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
58 elif stype == "set":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
59 stableSetAdaptator = TableBinSetAdaptator( db, stable )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
60 # stableSetAdaptator=TableSetAdaptator(db,stable)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
61 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
62 string = "unknown subject table type: %s" % ( stype )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
63 if verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
64 print string
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
65 logging.error( string )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
66 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
67
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
68 count = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
69 for path_num in path_num_list:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
70 if qtype == "path":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
71 qlist = qtablePathAdaptator.getPathList_from_num( path_num )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
72 qlist = pyRepet.coord.Path.path_list_rangeQ2Set( qlist )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
73 elif qtype == "set":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
74 qlist = qtableSetAdaptator.getSetList_from_num( path_num )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
75
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
76 qlist.sort()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
77 qmin, qmax = pyRepet.coord.Set.set_list_boundaries( qlist )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
78
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
79 qmin = qmin - 1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
80 qmax = qmax + 1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
81 if stype == "path":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
82 slist = stablePathAdaptator.getPathList_from_qcoord(qlist[0].seqname.split()[0],qmin,qmax)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
83 slist = pyRepet.coord.Path.path_list_rangeQ2Set( slist )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
84 elif stype == "set":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
85 slist = stableSetAdaptator.getSetList_from_qcoord(qlist[0].seqname.split()[0],qmin,qmax)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
86
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
87 if len(slist) > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
88 print "----------------------------------------"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
89 print "query:"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
90 pyRepet.coord.Set.set_list_show( qlist )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
91 qlist=pyRepet.coord.Set.set_list_merge( qlist )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
92 qsize=pyRepet.coord.Set.set_list_size( qlist )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
93 print "query size=",qsize
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
94
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
95 slist_dict = pyRepet.coord.Set.set_list_split( slist )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
96 subj_names = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
97 for i,l in slist_dict.items():
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
98 if subj_names != "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
99 subj_names += "|"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
100 subj_names += "%d:%s" % (i,l[0].name)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
101 subj_count = len(slist_dict.keys())
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
102
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
103 print "subject:"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
104 pyRepet.coord.Set.set_list_show( slist )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
105 slist = pyRepet.coord.Set.set_list_merge( slist )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
106 ssize = pyRepet.coord.Set.set_list_size( slist )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
107 osize = pyRepet.coord.Set.set_list_overlap_size( qlist, slist )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
108
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
109 print "subject size=",ssize
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
110 print "overlap size=",osize
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
111
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
112 fout.write("%d\t%s\t%d\t%s\t%d\t%d\t%d\t%f\t%f\n"\
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
113 %(path_num,qlist[0].name,\
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
114 qsize,\
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
115 subj_names,\
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
116 subj_count,\
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
117 ssize,\
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
118 osize,\
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
119 float(osize)/qsize,\
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
120 float(osize)/ssize))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
121 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
122 print "----------------------------------------"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
123 print "query:"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
124 pyRepet.coord.Set.set_list_show( qlist )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
125 qlist = pyRepet.coord.Set.set_list_merge( qlist )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
126 qsize = pyRepet.coord.Set.set_list_size( qlist )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
127 print "query size=",qsize
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
128 print "No match!"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
129 fout.write("%d\t%s\t%d\t-\t0\t0\t0\t0.0\t0.0\n"\
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
130 %(path_num,qlist[0].name,qsize))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
131
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
132
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
133 def getOverlapAllPaths( qtable, qtype, stable, stype, db, verbose=0 ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
134 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
135 For each query in qtable, compute the overlap between its matches and the matches in stable.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
136 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
137 if qtype =="map":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
138 qtableAdaptator = pyRepet.sql.TableAdaptator.TableMapAdaptator( db, qtable )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
139 elif qtype == "path":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
140 qtableAdaptator = pyRepet.sql.TableAdaptator.TablePathAdaptator( db, qtable )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
141 elif qtype == "set":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
142 qtableAdaptator = pyRepet.sql.TableAdaptator.TableSetAdaptator( db, qtable )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
143 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
144 string = "unknown query table type: %s" % ( qtype )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
145 if verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
146 print string
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
147 logging.error( string )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
148 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
149
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
150 string = "fetching query table data..."
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
151 contigs = qtableAdaptator.getContig_name()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
152 string += " %i contig(s)" % ( len(contigs) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
153 if verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
154 print string; sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
155 logging.info( string )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
156
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
157 if stype == "map":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
158 stableAdaptator = pyRepet.sql.TableAdaptator.TableMapAdaptator( db, stable )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
159 elif stype == "path":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
160 stableAdaptator = pyRepet.sql.TableAdaptator.TablePathAdaptator( db, stable )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
161 elif stype == "set":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
162 stableAdaptator = pyRepet.sql.TableAdaptator.TableSetAdaptator( db, stable )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
163 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
164 string = "unknown subject table type: %s" % ( stype )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
165 if verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
166 print string
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
167 logging.error( string )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
168 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
169
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
170 string = "looking for overlaps with subject data..."
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
171 if verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
172 print string; sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
173 logging.info( string )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
174 sum_qsize = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
175 sum_osize = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
176 sum_non_osize = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
177 for c in contigs:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
178 string = "contig '%s': "%(c)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
179 qlist = qtableAdaptator.getSetList_from_contig( c )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
180 qlist = pyRepet.coord.Set.set_list_merge( qlist )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
181 slist = stableAdaptator.getSetList_from_contig( c )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
182 slist = pyRepet.coord.Set.set_list_merge( slist )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
183 qsize = pyRepet.coord.Set.set_list_size( qlist )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
184 osize = pyRepet.coord.Set.set_list_overlap_size( qlist, slist )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
185 sum_osize += osize
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
186 sum_qsize += qsize
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
187 sum_non_osize += qsize - osize
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
188 string += "qsize=%d osize=%d" % ( qsize, osize )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
189 logging.debug( string )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
190 if verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
191 print string; sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
192
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
193 string = "summary:"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
194 string += "\ncumulative coverage of the query table: %i nt" % ( sum_qsize )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
195 string += "\nsize of overlaps with the subject table: %i nt" % ( sum_osize )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
196 string += "\n proportion of query: %.3f" % ( float(sum_osize)/sum_qsize )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
197 string += "\nsize of non-overlaps with the subject table: %i nt" % ( sum_non_osize )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
198 string += "\n proportion of query: %.3f" % ( float(sum_non_osize)/sum_qsize )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
199 if verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
200 print string; sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
201 logging.info( string )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
202
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
203 return sum_osize, sum_non_osize, sum_qsize
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
204
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
205
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
206 def main ():
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
207 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
208 This program computes the overlaps between two tables recording spatial coordinates.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
209 """
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
210 qtable = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
211 stable = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
212 type = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
213 by_path = False
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
214 configFileName = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
215 host = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
216 user = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
217 passwd = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
218 db = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
219 verbose = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
220 try:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
221 opts, args = getopt.getopt( sys.argv[1:], "hq:s:t:pc:H:U:P:D:v:" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
222 except getopt.GetoptError:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
223 help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
224 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
225 if len(args) != 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
226 help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
227 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
228 for o,a in opts:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
229 if o == "-h":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
230 help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
231 sys.exit(0)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
232 elif o == "-q":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
233 qtable = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
234 elif o == "-s":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
235 stable = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
236 elif o == "-t":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
237 type = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
238 elif o == "-p":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
239 by_path = True
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
240 elif o == "-c":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
241 configFileName = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
242 elif o == "-H":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
243 host = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
244 elif o == "-U":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
245 user = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
246 elif o == "-P":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
247 passwd = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
248 elif o == "-D":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
249 db = a
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
250 elif o == "-v":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
251 verbose = int(a)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
252 if qtable=="" or stable=="" or \
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
253 (configFileName== "" and (host=="" or \
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
254 user=="" or passwd=="" or db=="")):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
255 print "ERROR: missing compulsory options"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
256 help()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
257 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
258 if verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
259 print "START %s" % (sys.argv[0].split("/")[-1])
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
260 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
261
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
262 if configFileName != "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
263 config = ConfigParser.ConfigParser()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
264 config.readfp( open(configFileName) )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
265 host = config.get("repet_env","repet_host")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
266 user = config.get("repet_env","repet_user")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
267 passwd = config.get("repet_env","repet_pw")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
268 dbname = config.get("repet_env","repet_db")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
269
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
270 logfilename = qtable + "-" + stable + "-" + str(os.getpid()) + ".log"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
271 handler = logging.FileHandler( logfilename )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
272 formatter = logging.Formatter("%(asctime)s %(levelname)s: %(message)s")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
273 handler.setFormatter( formatter )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
274 logging.getLogger('').addHandler(handler)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
275 logging.getLogger('').setLevel(logging.DEBUG)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
276 logging.info("started")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
277
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
278 db = pyRepet.sql.RepetDBMySQL.RepetDB( user, host, passwd, dbname )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
279
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
280 qtype, stype = type.split("/")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
281
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
282 if not db.exist( qtable ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
283 if not os.path.exists( qtable ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
284 msg = "ERROR: neither table nor file '%s'" % ( qtable )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
285 sys.stderr.write( "%s\n" % msg )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
286 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
287 tmp = qtable.replace(".","_")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
288 db.create_table( db, tmp, qtable, qtype )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
289 qtable = tmp
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
290 if not db.exist( stable ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
291 if not os.path.exists( stable ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
292 msg = "ERROR: neither table nor file '%s'" % ( stable )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
293 sys.stderr.write( "%s\n" % msg )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
294 sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
295 tmp = stable.replace(".","_")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
296 db.create_table( db, tmp, stable, qtype )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
297 stable = tmp
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
298
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
299 string = "input tables:"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
300 string += "\nquery table: %s ('%s' format)" % ( qtable, qtype )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
301 string += "\nsubject table: %s ('%s' format)" % ( stable, stype )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
302 logging.info( string )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
303
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
304 if by_path:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
305 fout = open(qtable+"_vs_"+stable+".dat","w")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
306 pathOverlapByPath( qtable, qtype, stable, stype, db, fout, verbose )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
307 fout.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
308 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
309 getOverlapAllPaths( qtable, qtype, stable, stype, db, verbose )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
310
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
311 logging.info("finished")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
312
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
313 if verbose > 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
314 print "END %s" % (sys.argv[0].split("/")[-1])
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
315 sys.stdout.flush()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
316
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
317
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
318 if __name__ == "__main__":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
319 main()