Mercurial > repos > yufei-luo > s_mart
comparison commons/core/parsing/FindRep.py @ 36:44d5973c188c
Uploaded
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 15:02:29 -0400 |
parents | 769e306b7933 |
children |
comparison
equal
deleted
inserted
replaced
35:d94018ca4ada | 36:44d5973c188c |
---|---|
1 import re | |
2 from xml.sax.handler import ContentHandler | |
3 | |
4 class FindRep( ContentHandler ): | |
5 def __init__(self,outfileName, filter=0,count=0): | |
6 self.inWindowContent = 0 | |
7 self.inSeqNameContent = 0 | |
8 self.inStartContent = 0 | |
9 self.inEndContent = 0 | |
10 self.inPeriodContent = 0 | |
11 self.inUnitContent = 0 | |
12 self.inScoreContent = 0 | |
13 self.count = count | |
14 self._outfileName = outfileName | |
15 self.filter=filter | |
16 | |
17 def startDocument(self): | |
18 self._fileout = open(self._outfileName,"w") | |
19 | |
20 def startElement(self,name,attrs): | |
21 if name=="window": | |
22 self.inWindowContent=1 | |
23 elif name=="sequence-name": | |
24 self.inSeqNameContent=1 | |
25 self.seqname="" | |
26 elif name=="repeat": | |
27 self.inRepContent=1 | |
28 self.start="" | |
29 self.end="" | |
30 self.period="" | |
31 self.type={} | |
32 elif name=="start": | |
33 self.inStartContent=1 | |
34 elif name=="end": | |
35 self.inEndContent=1 | |
36 elif name=="period": | |
37 self.inPeriodContent=1 | |
38 elif name=="unit": | |
39 self.inUnitContent=1 | |
40 self.unit="" | |
41 elif name=="score": | |
42 self.inScoreContent=1 | |
43 self.score="" | |
44 | |
45 def characters(self,ch): | |
46 if self.inSeqNameContent: | |
47 self.seqname+=ch | |
48 elif self.inStartContent: | |
49 self.start+=ch | |
50 elif self.inEndContent: | |
51 self.end+=ch | |
52 elif self.inPeriodContent: | |
53 self.period+=ch | |
54 elif self.inUnitContent: | |
55 self.unit+=ch | |
56 elif self.inScoreContent: | |
57 self.score+=ch | |
58 | |
59 def endElement(self,name): | |
60 if name=="window": | |
61 self.inWindowContent=0 | |
62 elif name=="sequence-name": | |
63 self.inSeqNameContent=0 | |
64 elif name=="repeat": | |
65 self.inRepContent=0 | |
66 start=int(self.start) | |
67 end=int(self.end) | |
68 period=int(self.period) | |
69 score=float(self.score) | |
70 if score>self.filter: | |
71 return | |
72 max = 0 | |
73 self.count+=1 | |
74 for k,n in self.type.items(): | |
75 if n>max: | |
76 max = n | |
77 k_max = k | |
78 | |
79 m=re.match("^[0-9]+.+\{Cut\}",self.seqname) | |
80 if m!=None: | |
81 seqname=self.seqname[m.start(0):m.end(0)-5].rstrip() | |
82 seqname=re.sub("^[0-9]+ ","",seqname).lstrip() | |
83 tok=self.seqname[m.end(0):].split("..") | |
84 astart=start+int(tok[0])-1 | |
85 aend=end+int(tok[0])-1 | |
86 else: | |
87 astart=start | |
88 aend=end | |
89 seqname=self.seqname | |
90 if len(k_max) > 100: | |
91 k_max=k_max[:48]+"..."+k_max[-51:] | |
92 strout="%d\t(%s)%d\t%s\t%d\t%d"%\ | |
93 (self.count,k_max,(abs(start-end)+1)/period,\ | |
94 seqname,astart,aend) | |
95 self._fileout.write("%s\n"%(strout)) | |
96 | |
97 elif name=="start": | |
98 self.inStartContent=0 | |
99 elif name=="end": | |
100 self.inEndContent=0 | |
101 elif name=="period": | |
102 self.inPeriodContent=0 | |
103 elif name=="score": | |
104 self.inScoreContent=0 | |
105 elif name=="unit": | |
106 self.inUnitContent=0 | |
107 if self.type.has_key(self.unit): | |
108 self.type[self.unit]+=1 | |
109 else: | |
110 self.type[self.unit]=1 | |
111 | |
112 def endDocument(self): | |
113 self._fileout.close() |