comparison tools/rgenetics/listFiles.py @ 0:9071e359b9a3

Uploaded
author xuebing
date Fri, 09 Mar 2012 19:37:19 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:9071e359b9a3
1 #Provides Upload tool with access to list of available files
2 import glob,sys
3 import galaxy.app as thisapp
4 import galaxy.util
5
6 from elementtree.ElementTree import XML
7
8 librepos = '/usr/local/galaxy/data/rg'
9 myrepos = '/home/rerla/galaxy'
10 marchinirepos = '/usr/local/galaxy/data/rg/snptest'
11
12 from galaxy.tools.parameters import DataToolParameter
13
14 #Provides Upload tool with access to list of available builds
15
16 builds = []
17 #Read build names and keys from galaxy.util
18 for dbkey, build_name in galaxy.util.dbnames:
19 builds.append((build_name,dbkey,False))
20
21 #Return available builds
22 def get_available_builds(defval='hg18'):
23 for i,x in enumerate(builds):
24 if x[1] == defval:
25 x = list(x)
26 x[2] = True
27 builds[i] = tuple(x)
28 return builds
29
30
31
32 def get_tabular_cols( input, outformat='gg' ):
33 """numeric only other than rs for strict genome graphs
34 otherwise tabular. Derived from galaxy tool source around August 2007 by Ross"""
35 columns = []
36 seenCnames = {}
37 elems = []
38 colnames = ['Col%d' % x for x in range(input.metadata.columns+1)]
39 strict = (outformat=='gg')
40 for i, line in enumerate( file ( input.file_name ) ):
41 if line and not line.startswith( '#' ):
42 line = line.rstrip('\r\n')
43 elems = line.split( '\t' )
44
45 """
46 Strict gg note:
47 Since this tool requires users to select only those columns
48 that contain numerical values, we'll restrict the column select
49 list appropriately other than the first column which must be a marker
50 """
51 if len(elems) > 0:
52 for col in range(1, input.metadata.columns+1):
53 isFloat = False # short circuit common result
54 try:
55 val = float(elems[col-1])
56 isFloat = True
57 except:
58 val = elems[col-1]
59 if val:
60 if i == 0: # header row
61 colnames[col] = val
62 if isFloat or (not strict) or (col == 1): # all in if not GG
63 option = colnames[col]
64 if not seenCnames.get(option,None): # new
65 columns.append((option,str(col),False))
66 seenCnames[option] = option
67 #print 'get_tab: %d=%s. Columns=%s' % (i,line,str(columns))
68 if len(columns) > 0 and i > 10:
69 """
70 We have our select list built, so we can break out of the outer most for loop
71 """
72 break
73 if i == 30:
74 break # Hopefully we never get here...
75 for option in range(min(5,len(columns))):
76 (x,y,z) = columns[option]
77 columns[option] = (x,y,True)
78 return columns # sorted select options
79
80 def get_marchini_dir():
81 """return the filesystem directory for snptest style files"""
82 return marchinirepos
83
84
85 def get_lib_SNPTESTCaCofiles():
86 """return a list of file names - without extensions - available for caco studies
87 These have a common file name with both _1 and _2 suffixes"""
88 d = get_marchini_dir()
89 testsuffix = '.gen_1' # glob these
90 flist = glob.glob('%s/*%s' % (d,testsuffix))
91 flist = [x.split(testsuffix)[0] for x in flist] # leaves with a list of file set names
92 if len(flist) > 0:
93 dat = [(flist[0],flist[0],True),]
94 dat += [(x,x,False) for x in flist[1:]]
95 else:
96 dat = [('No Marchini CaCo files found in %s - convert some using the Marchini converter tool' % d,'None',True),]
97 return dat
98
99 def getChropt():
100 """return dynamic chromosome select options
101 """
102 c = ['X','Y']
103 c += ['%d' % x for x in range(1,23)]
104 dat = [(x,x,False) for x in c]
105 x,y,z = dat[3]
106 dat[3] = (x,y,True)
107 return dat
108
109
110 def get_phecols(fname=''):
111 """ return a list of phenotype columns for a multi-select list
112 prototype:
113 foo = ('fake - not yet implemented','not implemented','False')
114 dat = [foo for x in range(5)]
115 return dat
116 """
117 try:
118 header = file(fname,'r').next().split()
119 except:
120 return [('get_phecols unable to open file %s' % fname,'None',False),]
121 dat = [(x,x,False) for x in header]
122 return dat
123
124 #Return various kinds of files
125
126 def get_lib_pedfiles():
127 dat = glob.glob('%s/ped/*.ped' % librepos)
128 dat += glob.glob('%s/ped/*.ped' % myrepos)
129 dat.sort()
130 if len(dat) > 0:
131 dat = [x.split('.ped')[0] for x in dat]
132 dat = [(x,x,'True') for x in dat]
133 else:
134 dat = [('No ped files - add some to %s/ped or %s/ped' % (librepos,myrepos),'None',True),]
135 return dat
136
137 def get_lib_phefiles():
138 ext = 'phe'
139 dat = glob.glob('%s/pheno/*.%s' % (librepos,ext))
140 dat += glob.glob('%s/pheno/*.%s' % (myrepos,ext))
141 dat.sort()
142 if len(dat) > 0:
143 dat = [(x,x,'False') for x in dat]
144 else:
145 dat = [('No %s files - add some to %s/pheno or %s/pheno' % (ext,librepos,myrepos),'None',True),]
146 return dat
147
148 def get_lib_bedfiles():
149 dat = glob.glob('%s/plinkbed/*.bed' % librepos)
150 dat += glob.glob('%s/plinkbed/*.bed' % myrepos)
151 dat.sort()
152 if len(dat) > 0:
153 dat = [x.split('.bed')[0] for x in dat]
154 dat = [(x,x,False) for x in dat]
155 else:
156 dat = [('No bed files - Please import some to %s/plinkbed or %s/plinkbed' % (librepos,myrepos),'None',True),]
157 return dat
158
159 def get_lib_fbatfiles():
160 dat = glob.glob('%s/plinkfbat/*.ped' % librepos)
161 dat += glob.glob('%s/plinkfbat/*.ped' % myrepos)
162 dat.sort()
163 if len(dat) > 0:
164 dat = [(x,x,False) for x in dat]
165 else:
166 dat = [('No fbat bed files - Please import some to %s/plinkfbat or %s/plinkfbat' % (librepos,myrepos),'None',True),]
167 return dat
168
169 def get_lib_mapfiles():
170 dat = glob.glob('%s/ped/*.map' % librepos)
171 dat += glob.glob('%s/ped/*.map' % myrepos)
172 dat.sort()
173 if len(dat) > 0:
174 dat = [(x,x,False) for x in dat]
175 else:
176 dat = [('No map files - add some to %s/ped' % librepos,'None',True),]
177 return dat
178
179 def get_my_pedfiles():
180 dat = glob.glob('%s/*.ped' % myrepos)
181 if len(dat) > 0:
182 dat = [(x,x,False) for x in dat]
183 else:
184 dat = [('No ped files - add some to %s' % librepos,'None',True),]
185 return dat
186
187 def get_my_mapfiles():
188 dat = glob.glob('%s/*.map' % myrepos)
189 if len(dat) > 0:
190 dat = [(x,x,'True') for x in dat]
191 else:
192 dat = [('No ped files - add some to %s' % librepos,'None',True),]
193 return dat
194
195 def get_lib_xlsfiles():
196 dat = glob.glob('%s/*.xls' % librepos)
197 if len(dat) > 0:
198 dat = [(x,x,False) for x in dat]
199 else:
200 dat = [('No ped files - add some to %s' % librepos,'None',True),]
201 return dat
202
203 def get_lib_htmlfiles():
204 dat = glob.glob('%s/*.html' % librepos)
205 if len(dat) > 0:
206 dat = [(x,x,False) for x in dat]
207 else:
208 dat = [('No ped files - add some to %s' % librepos,'None',True),]
209 return dat
210
211 def get_my_xlsfiles():
212 dat = glob.glob('%s/*.xls' % myrepos)
213 if len(dat) > 0:
214 dat = [(x,x,False) for x in dat]
215 else:
216 dat = [('No ped files - add some to %s' % librepos,'None',True),]
217 return dat
218
219 def get_my_htmlfiles():
220 dat = glob.glob('%s/*.html' % myrepos)
221 if len(dat) > 0:
222 dat = [(x,x,False) for x in dat]
223 else:
224 dat = [('No ped files - add some to %s' % librepos,'None',True),]
225 return dat
226
227