annotate tools/rgenetics/rgPedSub.py @ 1:cdcb0ce84a1b

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:45:15 -0500
parents 9071e359b9a3
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
1 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
2 July 1 2009 added relatedness filter - fo/oo or all
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
3 released under the terms of the LGPL
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
4 copyright ross lazarus August 2007
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
5 for the rgenetics project
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
6
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
7 Special galaxy tool for the camp2007 data
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
8 Allows grabbing genotypes from an arbitrary region
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
9
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
10 Needs a mongo results file in the location hardwired below or could be passed in as
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
11 a library parameter - but this file must have a very specific structure
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
12 rs chrom offset float1...floatn
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
13
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
14 called as
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
15
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
16 <command interpreter="python2.4">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
17 campRGeno2.py $region "$rslist" "$title" $output1 $log_file $userId "$lpedIn" "$lhistIn"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
18 </command>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
19
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
20
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
21 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
22
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
23
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
24 import sys, array, os, string
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
25 from rgutils import galhtmlprefix,plinke,readMap
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
26
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
27 progname = os.path.split(sys.argv[0])[1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
28
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
29
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
30 atrandic = {'A':'1','C':'2','G':'3','T':'4','N':'0','-':'0','1':'1','2':'2','3':'3','4':'4','0':'0'}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
31
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
32 def doImport(outfile='test',flist=[]):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
33 """ import into one of the new html composite data types for Rgenetics
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
34 Dan Blankenberg with mods by Ross Lazarus
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
35 October 2007
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
36 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
37 out = open(outfile,'w')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
38 out.write(galhtmlprefix % progname)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
39
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
40 if len(flist) > 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
41 out.write('<ol>\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
42 for i, data in enumerate( flist ):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
43 out.write('<li><a href="%s">%s</a></li>\n' % (os.path.split(data)[-1],os.path.split(data)[-1]))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
44 out.write('</ol>\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
45 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
46 out.write('No files found')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
47 out.write("</div></body></html>")
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
48 out.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
49
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
50 def setupPedFilter(relfilter='oo',dfile=None):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
51 """ figure out who to drop to satisfy relative filtering
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
52 note single offspring only from each family id
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
53 ordering of pdict keys makes this 'random' as the first one only is
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
54 kept if there are multiple sibs from same familyid.
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
55 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
56 dropId = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
57 keepoff = (relfilter == 'oo')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
58 keepfounder = (relfilter == 'fo')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
59 pdict = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
60 for row in dfile:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
61 rowl = row.strip().split()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
62 if len(rowl) > 6:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
63 idk = (rowl[0],rowl[1])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
64 pa = (rowl[0],rowl[2]) # key for father
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
65 ma = (rowl[0],rowl[3]) # and mother
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
66 pdict[idk] = (pa,ma)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
67 dfile.seek(0) # rewind
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
68 pk = pdict.keys()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
69 for p in pk:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
70 parents = pdict[p]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
71 if pdict.get(parents[0],None) or pdict.get(parents[1],None): # parents are in this file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
72 if keepfounder:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
73 dropId[p] = 1 # flag for removal
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
74 elif keepoff:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
75 dropId[p] = 1 # flag for removal
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
76 if keepoff: # TODO keep only a random offspring if many - rely on pdict keys being randomly ordered...!
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
77 famseen = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
78 for p in pk: # look for multiples from same family - drop all but first
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
79 famid = p[0]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
80 if famseen.get(famid,None):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
81 dropId[p] = 1 # already got one from this family
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
82 famseen.setdefault(famid,1)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
83 return dropId
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
84
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
85 def writeFped(rslist=[],outdir=None,title='Title',basename='',dfile=None,wewant=[],dropId={},outfile=None,logfile=None):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
86 """ fbat format version
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
87 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
88 outname = os.path.join(outdir,basename)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
89 pedfname = '%s.ped' % outname
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
90 ofile = file(pedfname, 'w')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
91 rsl = ' '.join(rslist) # rslist for fbat
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
92 ofile.write(rsl)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
93 s = 'wrote %d marker header to %s - %s\n' % (len(rslist),pedfname,rsl[:50])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
94 lf.write(s)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
95 ofile.write('\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
96 nrows = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
97 for line in dfile:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
98 line = line.strip()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
99 if not line:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
100 continue
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
101 line = line.replace('D','N')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
102 fields = line.split()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
103 preamble = fields[:6]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
104 idk = (preamble[0],preamble[1])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
105 dropme = dropId.get(idk,None)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
106 if not dropme:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
107 g = ['%s %s' % (fields[snpcol], fields[snpcol+1]) for snpcol in wewant]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
108 g = ' '.join(g)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
109 g = g.split() # we'll get there
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
110 g = [atrandic.get(x,'0') for x in g] # numeric alleles...
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
111 # hack for framingham ND
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
112 ofile.write('%s %s\n' % (' '.join(preamble), ' '.join(g)))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
113 nrows += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
114 ofile.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
115 loglist = open(logfile,'r').readlines() # retrieve log to add to html file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
116 doImport(outfile,[pedfname,],loglist=loglist)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
117 return nrows,pedfname
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
118
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
119 def writePed(markers=[],outdir=None,title='Title',basename='',dfile=None,wewant=[],dropId={},outfile=None,logfile=None):
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
120 """ split out
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
121 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
122 outname = os.path.join(outdir,basename)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
123 mapfname = '%s.map' % outname
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
124 pedfname = '%s.ped' % outname
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
125 ofile = file(pedfname, 'w')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
126 # make a map file in the lped library
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
127 mf = file(mapfname,'w')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
128 map = ['%s\t%s\t0\t%d' % (x[0],x[2],x[1]) for x in markers] # chrom,abspos,snp in genomic order
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
129 mf.write('%s\n' % '\n'.join(map))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
130 mf.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
131 nrows = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
132 for line in dfile:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
133 line = line.strip()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
134 if not line:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
135 continue
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
136 #line = line.replace('D','N')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
137 fields = line.split()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
138 preamble = fields[:6]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
139 idk = (preamble[0],preamble[1])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
140 dropme = dropId.get(idk,None)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
141 if not dropme:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
142 g = ['%s %s' % (fields[snpcol], fields[snpcol+1]) for snpcol in wewant]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
143 g = ' '.join(g)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
144 g = g.split() # we'll get there
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
145 g = [atrandic.get(x,'0') for x in g] # numeric alleles...
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
146 # hack for framingham ND
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
147 ofile.write('%s %s\n' % (' '.join(preamble), ' '.join(g)))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
148 nrows += 1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
149 ofile.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
150 loglist = open(logfile,'r').readlines() # retrieve log to add to html file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
151 doImport(outfile,[mapfname,pedfname,logfile])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
152 return nrows,pedfname
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
153
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
154 def subset():
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
155 """ ### Sanity check the arguments
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
156 now passed in as
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
157 <command interpreter="python">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
158 rgPedSub.py $script_file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
159 </command>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
160
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
161 with
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
162 <configfiles>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
163 <configfile name="script_file">
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
164 title~~~~$title
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
165 output1~~~~$output1
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
166 log_file~~~~$log_file
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
167 userId~~~~$userId
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
168 outformat~~~~$outformat
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
169 outdir~~~~$output1.extra_files_path
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
170 relfilter~~~~$relfilter
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
171 #if $d.source=='library'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
172 inped~~~~$d.lpedIn
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
173 #else
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
174 inped~~~~$d.lhistIn.extra_files_path/$d.lhistIn.metadata.base_name
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
175 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
176 #if $m.mtype=='grslist'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
177 rslist~~~~$m.rslist
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
178 region~~~~
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
179 #else
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
180 rslist~~~~
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
181 region~~~~$m.region
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
182 #end if
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
183 </configfile>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
184 </configfiles>
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
185 """
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
186 sep = '~~~~' # arbitrary choice
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
187 conf = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
188 if len(sys.argv) < 2:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
189 print >> sys.stderr, "No configuration file passed as a parameter - cannot run"
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
190 sys.exit(1)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
191 configf = sys.argv[1]
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
192 config = file(configf,'r').readlines()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
193 for row in config:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
194 row = row.strip()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
195 if len(row) > 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
196 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
197 key,valu = row.split(sep)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
198 conf[key] = valu
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
199 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
200 pass
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
201 ss = '%s%s' % (string.punctuation,string.whitespace)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
202 ptran = string.maketrans(ss,'_'*len(ss))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
203 ### Figure out what genomic region we are interested in
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
204 region = conf.get('region','')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
205 orslist = conf.get('rslist','').replace('X',' ').lower()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
206 orslist = orslist.replace(',',' ').lower()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
207 # galaxy replaces newlines with XX - go figure
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
208 title = conf.get('title','').translate(ptran) # for outputs
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
209 outfile = conf.get('output1','')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
210 outdir = conf.get('outdir','')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
211 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
212 os.makedirs(outdir)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
213 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
214 pass
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
215 outformat = conf.get('outformat','lped')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
216 basename = conf.get('basename',title)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
217 logfile = os.path.join(outdir,'%s.log' % title)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
218 userId = conf.get('userId','') # for library
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
219 pedFileBase = conf.get('inped','')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
220 relfilter = conf.get('relfilter','')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
221 MAP_FILE = '%s.map' % pedFileBase
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
222 DATA_FILE = '%s.ped' % pedFileBase
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
223 title = conf.get('title','lped subset')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
224 lf = file(logfile,'w')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
225 lf.write('config file %s = \n' % configf)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
226 lf.write(''.join(config))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
227 c = ''
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
228 spos = epos = 0
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
229 rslist = []
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
230 rsdict = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
231 if region > '':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
232 try: # TODO make a regexp?
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
233 c,rest = region.split(':')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
234 c = c.replace('chr','')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
235 rest = rest.replace(',','') # remove commas
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
236 spos,epos = rest.split('-')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
237 spos = int(spos)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
238 epos = int(epos)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
239 s = '## %s parsing chrom %s from %d to %d\n' % (progname,c,spos,epos)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
240 lf.write(s)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
241 except:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
242 s = '##! %s unable to parse region %s - MUST look like "chr8:10,000-100,000\n' % (progname,region)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
243 lf.write(s)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
244 lf.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
245 sys.exit(1)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
246 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
247 rslist = orslist.split() # galaxy replaces newlines with XX - go figure
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
248 rsdict = dict(zip(rslist,rslist))
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
249 allmarkers = False
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
250 if len(rslist) == 0 and epos == 0: # must be a full extract - presumably remove relateds or something
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
251 allmarkers = True
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
252 ### Figure out which markers are in this region
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
253 markers,snpcols,rslist,rsdict = readMap(mapfile=MAP_FILE,allmarkers=allmarkers,rsdict=rsdict,c=c,spos=spos,epos=epos)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
254 if len(rslist) == 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
255 s = '##! %s found no rs numbers in %s\n' % (progname,sys.argv[1:3])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
256 lf.write(s)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
257 lf.write('\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
258 lf.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
259 sys.exit(1)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
260 s = '## %s looking for %d rs (%s....etc)\n' % (progname,len(rslist),rslist[:5])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
261 lf.write(s)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
262 try:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
263 dfile = open(DATA_FILE, 'r')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
264 except: # bad input file name?
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
265 s = '##! rgPedSub unable to open file %s\n' % (DATA_FILE)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
266 lf.write(s)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
267 lf.write('\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
268 lf.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
269 print >> sys.stdout, s
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
270 raise
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
271 sys.exit(1)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
272 if relfilter <> 'all': # must read pedigree and figure out who to drop
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
273 dropId = setupPedFilter(relfilter=relfilter,dfile=dfile)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
274 else:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
275 dropId = {}
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
276 wewant = [(6+(2*snpcols[x])) for x in rslist] #
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
277 # column indices of first geno of each marker pair to get the markers into genomic
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
278 ### ... and then parse the rest of the ped file to pull out
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
279 ### the genotypes for all subjects for those markers
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
280 # /usr/local/galaxy/data/rg/1/lped/
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
281 if len(dropId.keys()) > 0:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
282 s = '## dropped the following subjects to satisfy requirement that relfilter = %s\n' % relfilter
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
283 lf.write(s)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
284 if relfilter == 'oo':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
285 s = '## note that one random offspring from each family was kept if there were multiple offspring\n'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
286 lf.write(s)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
287 s = 'FamilyId\tSubjectId\n'
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
288 lf.write(s)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
289 dk = dropId.keys()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
290 dk.sort()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
291 for k in dk:
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
292 s = '%s\t%s\n' % (k[0],k[1])
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
293 lf.write(s)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
294 lf.write('\n')
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
295 lf.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
296 if outformat == 'lped':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
297 nrows,pedfname=writePed(markers=markers,outdir=outdir,title=title,basename=basename,dfile=dfile,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
298 wewant=wewant,dropId=dropId,outfile=outfile,logfile=logfile)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
299 elif outformat == 'fped':
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
300 nrows,pedfname=writeFped(rslist=rslist,outdir=outdir,title=title,basename=basename,dfile=dfile,
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
301 wewant=wewant,dropId=dropId,outfile=outfile,logfile=logfile)
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
302 dfile.close()
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
303
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
304 if __name__ == "__main__":
9071e359b9a3 Uploaded
xuebing
parents:
diff changeset
305 subset()