annotate calclenchange.py @ 39:e56023008e36 default tip

Changed revision of package_fisher_0_1_4 to be2fc454d121 Changed revision of package_matplotlib_1_2 to a03ee94316b5
author miller-lab
date Mon, 06 Jul 2015 10:32:24 -0400
parents 2c498d40ecde
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
1 #!/usr/bin/env python
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
2 # -*- coding: utf-8 -*-
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
3 #
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
4 # calclenchange.py
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
5 #
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
6 # Copyright 2011 Oscar Bedoya-Reina <oscar@niska.bx.psu.edu>
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
7 #
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
8 # This program is free software; you can redistribute it and/or modify
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
9 # it under the terms of the GNU General Public License as published by
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
10 # the Free Software Foundation; either version 2 of the License, or
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
11 # (at your option) any later version.
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
12 #
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
13 # This program is distributed in the hope that it will be useful,
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
16 # GNU General Public License for more details.
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
17 #
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
18 # You should have received a copy of the GNU General Public License
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
19 # along with this program; if not, write to the Free Software
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
20 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
21 # MA 02110-1301, USA.
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
22
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
23 import argparse,mechanize,os,sys
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
24 from decimal import Decimal,getcontext
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
25 from xml.etree.ElementTree import ElementTree,tostring
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
26 import networkx as nx
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
27 from copy import copy
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
28
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
29 #method to rank the the pthways by mut. freq.
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
30 def rankdN(ltfreqs):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
31 ordvals=sorted(ltfreqs)#sort and reverse freqs.
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
32 #~
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
33 outrnk=[]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
34 tmpChng0,tmpOri,tmpMut,tmpPthw=ordvals.pop()#the highest possible value
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
35 if tmpOri=='C':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
36 if tmpMut!='C':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
37 tmpChng0='C-%s'%tmpMut
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
38 else:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
39 tmpChng0=Decimal('0')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
40 crank=1
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
41 outrnk.append([str(tmpChng0),str(tmpOri),str(tmpMut),str(crank),tmpPthw])
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
42 totalnvals=len(ordvals)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
43 cnt=0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
44 while totalnvals>cnt:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
45 cnt+=1
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
46 tmpChng,tmpOri,tmpMut,tmpPthw=ordvals.pop()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
47 if tmpOri=='C':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
48 if tmpMut!='C':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
49 tmpChng='C-%s'%tmpMut
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
50 else:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
51 tmpChng=Decimal('0')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
52 if tmpChng!=tmpChng0:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
53 crank=len(outrnk)+1
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
54 tmpChng0=tmpChng
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
55 outrnk.append([str(tmpChng),str(tmpOri),str(tmpMut),str(crank),tmpPthw])
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
56 return outrnk
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
57
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
58 #method to rank the the pthways by mut. freq.
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
59 def rankdAvr(ltfreqs):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
60 ordvals=sorted(ltfreqs)#sort and reverse freqs.
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
61 #~
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
62 outrnk={}
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
63 tmpChng0,tmpOri,tmpMut,tmpPthw=ordvals.pop()#the highest possible value
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
64 if tmpOri=='I':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
65 if tmpMut!='I':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
66 tmpChng0='I-%s'%tmpMut
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
67 else:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
68 tmpChng0=Decimal('0')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
69 crank=1
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
70 outrnk[tmpPthw]='\t'.join([str(tmpChng0),str(tmpOri),str(tmpMut),str(crank)])
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
71 totalnvals=len(ordvals)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
72 cnt=0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
73 while totalnvals>cnt:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
74 cnt+=1
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
75 tmpChng,tmpOri,tmpMut,tmpPthw=ordvals.pop()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
76 if tmpOri=='I':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
77 if tmpMut!='I':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
78 tmpChng='I-%s'%tmpMut
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
79 else:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
80 tmpChng=Decimal('0')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
81 if tmpChng!=tmpChng0:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
82 crank=len(outrnk)+1
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
83 tmpChng0=tmpChng
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
84 outrnk[tmpPthw]='\t'.join([str(tmpChng),str(tmpOri),str(tmpMut),str(crank)])
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
85 return outrnk
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
86
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
87 #this method takes as input a list of pairs of edges(beginNod,endNod) and returns a list of nodes with indegree 0 and outdegree 0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
88 def returnstartanendnodes(edges):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
89 listID0st=set()#starts
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
90 listOD0en=set()#end
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
91 for beginNod,endNod in edges:# O(n)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
92 listID0st.add(beginNod)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
93 listOD0en.add(endNod)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
94 startNdsID0=listID0st.difference(listOD0en)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
95 endNdsOD0=listOD0en.difference(listID0st)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
96 return startNdsID0,endNdsOD0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
97
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
98 #~ Method to return nodes and edges
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
99 def returnNodesNEdgesfKXML(fpthwKGXML):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
100 #~
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
101 tree = ElementTree()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
102 ptree=tree.parse(fpthwKGXML)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
103 #~
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
104 title=ptree.get('title')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
105 prots=ptree.findall('entry')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
106 reactns=ptree.findall('reaction')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
107 #~
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
108 edges,ndstmp=set(),set()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
109 nreactns=len(reactns)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
110 cr=0#count reacts
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
111 while nreactns>cr:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
112 cr+=1
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
113 reactn=reactns.pop()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
114 mainid=reactn.get('id')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
115 ndstmp.add(mainid)#add node
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
116 reacttyp=reactn.get('type')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
117 sbstrts=reactn.findall('substrate')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
118 while len(sbstrts)>0:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
119 csbstrt=sbstrts.pop()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
120 csbtsid=csbstrt.get('id')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
121 ndstmp.add(csbtsid)#add node
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
122 if reacttyp=='irreversible':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
123 edges.add((csbtsid,mainid))#add edges
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
124 elif reacttyp=='reversible':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
125 edges.add((mainid,csbtsid))#add edges
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
126 edges.add((csbtsid,mainid))#add edges
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
127 #~
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
128 prdcts=reactn.findall('product')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
129 while len(prdcts)>0:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
130 prdct=prdcts.pop()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
131 prodctid=prdct.get('id')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
132 ndstmp.add(prodctid)#add node
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
133 if reacttyp=='irreversible':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
134 edges.add((mainid,prodctid))#add edges
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
135 elif reacttyp=='reversible':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
136 edges.add((mainid,prodctid))#add edges
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
137 edges.add((prodctid,mainid))#add edges
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
138 #~ Nodes
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
139 nprots=len(prots)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
140 cp=0#count prots
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
141 dnodes={}
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
142 while nprots>cp:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
143 cp+=1
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
144 prot=prots.pop()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
145 tmpProtnm=prot.get('id')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
146 if tmpProtnm in ndstmp:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
147 dnodes[prot.get('id')]=set(prot.get('name').split())#each genename for each Id
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
148 return dnodes,edges,title
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
149
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
150 #~ make calculation on pathways
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
151 def rtrnAvrgLen(edges,strNds,endNds):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
152 wG=nx.DiGraph()#reference graph
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
153 wG.add_edges_from(edges)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
154 dPairsSrcSnks=nx.all_pairs_shortest_path_length(wG)#dictionary between sources and sink and length
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
155 nstartNdsID0=len(strNds)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
156 cstrtNds=0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
157 nPaths=0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
158 lPathLen=[]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
159 while nstartNdsID0>cstrtNds:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
160 cStartNd=strNds.pop()#current start node
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
161 dEndNdsLen=dPairsSrcSnks.pop(cStartNd)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
162 for cendNd in dEndNdsLen:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
163 if cendNd in endNds:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
164 lPathLen.append(dEndNdsLen[cendNd])
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
165 nPaths+=1
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
166 cstrtNds+=1
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
167 AvrgPthLen=0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
168 if nPaths!=0:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
169 AvrgPthLen=Decimal(sum(lPathLen))/Decimal(str(nPaths))
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
170 return nPaths,AvrgPthLen
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
171
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
172 def main():
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
173 parser = argparse.ArgumentParser(description='Rank pathways based on the change in length and number of paths connecting sources and sinks.')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
174 parser.add_argument('--loc_file',metavar='correlational database',type=str,help='correlational database')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
175 parser.add_argument('--species',metavar='species name',type=str,help='the species of interest in loc_file')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
176 parser.add_argument('--output',metavar='output TXT file',type=str,help='the output file with the table in txt format. Column 1 is the diference between column 2 and column 3, Column 2 is the pathway average length (between sources and sinks) including the genes in the input list, Column 3 is the pathway average length EXCLUDING the genes in the input list, Column 4 is the rank based on column 1. Column 5 is the diference between column 6 and column 7, Column 6 is the number of paths between sources and sinks, including the genes in the input list, Column 7 is the number of paths between sources and sinks EXCLUDING the genes in the input list, Column 8 is the rank based on column 5. Column 9 I the pathway name' )
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
177 parser.add_argument('--posKEGGclmn',metavar='column number',type=int,help='the column with the KEGG pathway code/name')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
178 parser.add_argument('--KEGGgeneposcolmn',metavar='column number',type=int,help='column with the KEGG gene code')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
179 parser.add_argument('--input',metavar='input TXT file',type=str,help='the input file with the table in txt format')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
180 #~
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
181 #~Open arguments
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
182 class C(object):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
183 pass
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
184 fulargs=C()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
185 parser.parse_args(sys.argv[1:],namespace=fulargs)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
186 #test input vars
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
187 inputf,loc_file,species,output,posKEGGclmn,Kgeneposcolmn=fulargs.input,fulargs.loc_file,fulargs.species,fulargs.output,fulargs.posKEGGclmn,fulargs.KEGGgeneposcolmn
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
188 posKEGGclmn-=1#correct pos
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
189 Kgeneposcolmn-=1
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
190 #~ Get the extra variables
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
191 crDB=[x.split() for x in open(loc_file).read().splitlines() if x.split()[0]==species][0]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
192 sppPrefx,dinput=crDB[1],crDB[2]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
193 #~ set decimal positions
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
194 getcontext().prec = 3
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
195 #make a dictionary of valid genes
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
196 dKEGGcPthws=dict([(x.split('\t')[Kgeneposcolmn],set([y.split('=')[0] for y in x.split('\t')[posKEGGclmn].split('.')])) for x in open(inputf).read().splitlines()[1:] if x.strip()])
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
197 sdGenes=set([x for x in dKEGGcPthws.keys() if x.find('.')>-1])
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
198 while True:#to crrect names with more than one gene
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
199 try:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
200 mgenes=sdGenes.pop()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
201 pthwsAssotd=dKEGGcPthws.pop(mgenes)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
202 mgenes=mgenes.split('.')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
203 for eachg in mgenes:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
204 dKEGGcPthws[eachg]=pthwsAssotd
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
205 except:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
206 break
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
207 #~
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
208 lPthwsF=[x for x in os.listdir(dinput) if x.find('.xml')>-1 if x not in ['cfa04070.xml']]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
209 nPthws=len(lPthwsF)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
210 cPthw=0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
211 lPthwPthN=[]#the output list for number of paths
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
212 lPthwPthAvr=[]#the output list for the length of paths
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
213 #~
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
214 while cPthw<nPthws:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
215 cPthw+=1
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
216 KEGGpathw=lPthwsF.pop()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
217 comdKEGGpathw=KEGGpathw.split('.')[0]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
218 tmpddGenrcgenPresent=set()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
219 sKEGGc=dKEGGcPthws.keys()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
220 lsKEGGc=len(sKEGGc)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
221 ctPthw=0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
222 while ctPthw < lsKEGGc:#to save memory
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
223 eachK=sKEGGc.pop()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
224 alPthws=dKEGGcPthws[eachK]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
225 if comdKEGGpathw in alPthws:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
226 tmpddGenrcgenPresent.add(':'.join([sppPrefx,eachK]))
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
227 ctPthw+=1
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
228 #~ Make graph calculations
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
229 dnodes,edges,title=returnNodesNEdgesfKXML(open(os.path.join(dinput,KEGGpathw)))
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
230 startNdsID0,endNdsOD0=returnstartanendnodes(edges)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
231 startNdsOri=copy(startNdsID0)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
232 #~
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
233 nPaths='C'#stands for circuit
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
234 AvrgPthLen='I'#stand for infinite
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
235 if len(startNdsID0)>0 and len(endNdsOD0)>0:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
236 nPaths,AvrgPthLen=rtrnAvrgLen(edges,startNdsID0,endNdsOD0)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
237 #~ work with the genes in the list
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
238 genestodel=set()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
239 lnodes=len(dnodes)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
240 sNds=set(dnodes)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
241 ctPthw=0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
242 while ctPthw<lnodes:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
243 ctPthw+=1
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
244 cNod=sNds.pop()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
245 sgenes=dnodes.pop(cNod)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
246 if len(sgenes.intersection(tmpddGenrcgenPresent))==len(sgenes):
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
247 genestodel.add(cNod)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
248 #~ del nodes from graph edges
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
249 wnPaths,wAvrgPthLen=copy(nPaths),copy(AvrgPthLen)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
250 if len(genestodel)>0:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
251 wedges=set([x for x in edges if len(set(x).intersection(genestodel))==0])
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
252 wstartNds,wendNds=returnstartanendnodes(wedges)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
253 if nPaths!='C':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
254 wstartNds=[x for x in wstartNds if x in startNdsOri]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
255 wendNds=[x for x in wendNds if x in endNdsOD0]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
256 if len(wstartNds)>0 and len(wendNds)>0:
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
257 wnPaths,wAvrgPthLen=rtrnAvrgLen(wedges,wstartNds,wendNds)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
258 #~ Calculate the differences
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
259 orNP,mutNP,oriLen,mutLen=nPaths,wnPaths,AvrgPthLen,wAvrgPthLen
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
260 if nPaths=='C':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
261 orNP=Decimal('1000')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
262 oriLen=Decimal('1000')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
263 if wnPaths=='C':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
264 mutNP=Decimal('1000')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
265 mutLen=Decimal('1000')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
266 lPthwPthN.append([orNP-mutNP,nPaths,wnPaths,'='.join([comdKEGGpathw,title])])#print nPaths,AvrgPthLen
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
267 lPthwPthAvr.append([oriLen-mutLen,AvrgPthLen,wAvrgPthLen,'='.join([comdKEGGpathw,title])])#print nPaths,AvrgPthLen
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
268 doutrnkPthN=rankdN(lPthwPthN)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
269 doutrnkPthAvr=rankdAvr(lPthwPthAvr)
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
270 #~
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
271 sall=['\t'.join([doutrnkPthAvr[x[4]],'\t'.join(x)]) for x in doutrnkPthN]
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
272 salef=open(output,'w')
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
273 salef.write('\n'.join(sall))
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
274 salef.close()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
275 return 0
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
276
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
277
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
278 if __name__ == '__main__':
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
279 main()
2c498d40ecde Uploaded
miller-lab
parents:
diff changeset
280