0
|
1 #Provides Upload tool with access to list of available files
|
|
2 import glob,sys
|
|
3 import galaxy.app as thisapp
|
|
4 import galaxy.util
|
|
5
|
|
6 from elementtree.ElementTree import XML
|
|
7
|
|
8 librepos = '/usr/local/galaxy/data/rg'
|
|
9 myrepos = '/home/rerla/galaxy'
|
|
10 marchinirepos = '/usr/local/galaxy/data/rg/snptest'
|
|
11
|
|
12 from galaxy.tools.parameters import DataToolParameter
|
|
13
|
|
14 #Provides Upload tool with access to list of available builds
|
|
15
|
|
16 builds = []
|
|
17 #Read build names and keys from galaxy.util
|
|
18 for dbkey, build_name in galaxy.util.dbnames:
|
|
19 builds.append((build_name,dbkey,False))
|
|
20
|
|
21 #Return available builds
|
|
22 def get_available_builds(defval='hg18'):
|
|
23 for i,x in enumerate(builds):
|
|
24 if x[1] == defval:
|
|
25 x = list(x)
|
|
26 x[2] = True
|
|
27 builds[i] = tuple(x)
|
|
28 return builds
|
|
29
|
|
30
|
|
31
|
|
32 def get_tabular_cols( input, outformat='gg' ):
|
|
33 """numeric only other than rs for strict genome graphs
|
|
34 otherwise tabular. Derived from galaxy tool source around August 2007 by Ross"""
|
|
35 columns = []
|
|
36 seenCnames = {}
|
|
37 elems = []
|
|
38 colnames = ['Col%d' % x for x in range(input.metadata.columns+1)]
|
|
39 strict = (outformat=='gg')
|
|
40 for i, line in enumerate( file ( input.file_name ) ):
|
|
41 if line and not line.startswith( '#' ):
|
|
42 line = line.rstrip('\r\n')
|
|
43 elems = line.split( '\t' )
|
|
44
|
|
45 """
|
|
46 Strict gg note:
|
|
47 Since this tool requires users to select only those columns
|
|
48 that contain numerical values, we'll restrict the column select
|
|
49 list appropriately other than the first column which must be a marker
|
|
50 """
|
|
51 if len(elems) > 0:
|
|
52 for col in range(1, input.metadata.columns+1):
|
|
53 isFloat = False # short circuit common result
|
|
54 try:
|
|
55 val = float(elems[col-1])
|
|
56 isFloat = True
|
|
57 except:
|
|
58 val = elems[col-1]
|
|
59 if val:
|
|
60 if i == 0: # header row
|
|
61 colnames[col] = val
|
|
62 if isFloat or (not strict) or (col == 1): # all in if not GG
|
|
63 option = colnames[col]
|
|
64 if not seenCnames.get(option,None): # new
|
|
65 columns.append((option,str(col),False))
|
|
66 seenCnames[option] = option
|
|
67 #print 'get_tab: %d=%s. Columns=%s' % (i,line,str(columns))
|
|
68 if len(columns) > 0 and i > 10:
|
|
69 """
|
|
70 We have our select list built, so we can break out of the outer most for loop
|
|
71 """
|
|
72 break
|
|
73 if i == 30:
|
|
74 break # Hopefully we never get here...
|
|
75 for option in range(min(5,len(columns))):
|
|
76 (x,y,z) = columns[option]
|
|
77 columns[option] = (x,y,True)
|
|
78 return columns # sorted select options
|
|
79
|
|
80 def get_marchini_dir():
|
|
81 """return the filesystem directory for snptest style files"""
|
|
82 return marchinirepos
|
|
83
|
|
84
|
|
85 def get_lib_SNPTESTCaCofiles():
|
|
86 """return a list of file names - without extensions - available for caco studies
|
|
87 These have a common file name with both _1 and _2 suffixes"""
|
|
88 d = get_marchini_dir()
|
|
89 testsuffix = '.gen_1' # glob these
|
|
90 flist = glob.glob('%s/*%s' % (d,testsuffix))
|
|
91 flist = [x.split(testsuffix)[0] for x in flist] # leaves with a list of file set names
|
|
92 if len(flist) > 0:
|
|
93 dat = [(flist[0],flist[0],True),]
|
|
94 dat += [(x,x,False) for x in flist[1:]]
|
|
95 else:
|
|
96 dat = [('No Marchini CaCo files found in %s - convert some using the Marchini converter tool' % d,'None',True),]
|
|
97 return dat
|
|
98
|
|
99 def getChropt():
|
|
100 """return dynamic chromosome select options
|
|
101 """
|
|
102 c = ['X','Y']
|
|
103 c += ['%d' % x for x in range(1,23)]
|
|
104 dat = [(x,x,False) for x in c]
|
|
105 x,y,z = dat[3]
|
|
106 dat[3] = (x,y,True)
|
|
107 return dat
|
|
108
|
|
109
|
|
110 def get_phecols(fname=''):
|
|
111 """ return a list of phenotype columns for a multi-select list
|
|
112 prototype:
|
|
113 foo = ('fake - not yet implemented','not implemented','False')
|
|
114 dat = [foo for x in range(5)]
|
|
115 return dat
|
|
116 """
|
|
117 try:
|
|
118 header = file(fname,'r').next().split()
|
|
119 except:
|
|
120 return [('get_phecols unable to open file %s' % fname,'None',False),]
|
|
121 dat = [(x,x,False) for x in header]
|
|
122 return dat
|
|
123
|
|
124 #Return various kinds of files
|
|
125
|
|
126 def get_lib_pedfiles():
|
|
127 dat = glob.glob('%s/ped/*.ped' % librepos)
|
|
128 dat += glob.glob('%s/ped/*.ped' % myrepos)
|
|
129 dat.sort()
|
|
130 if len(dat) > 0:
|
|
131 dat = [x.split('.ped')[0] for x in dat]
|
|
132 dat = [(x,x,'True') for x in dat]
|
|
133 else:
|
|
134 dat = [('No ped files - add some to %s/ped or %s/ped' % (librepos,myrepos),'None',True),]
|
|
135 return dat
|
|
136
|
|
137 def get_lib_phefiles():
|
|
138 ext = 'phe'
|
|
139 dat = glob.glob('%s/pheno/*.%s' % (librepos,ext))
|
|
140 dat += glob.glob('%s/pheno/*.%s' % (myrepos,ext))
|
|
141 dat.sort()
|
|
142 if len(dat) > 0:
|
|
143 dat = [(x,x,'False') for x in dat]
|
|
144 else:
|
|
145 dat = [('No %s files - add some to %s/pheno or %s/pheno' % (ext,librepos,myrepos),'None',True),]
|
|
146 return dat
|
|
147
|
|
148 def get_lib_bedfiles():
|
|
149 dat = glob.glob('%s/plinkbed/*.bed' % librepos)
|
|
150 dat += glob.glob('%s/plinkbed/*.bed' % myrepos)
|
|
151 dat.sort()
|
|
152 if len(dat) > 0:
|
|
153 dat = [x.split('.bed')[0] for x in dat]
|
|
154 dat = [(x,x,False) for x in dat]
|
|
155 else:
|
|
156 dat = [('No bed files - Please import some to %s/plinkbed or %s/plinkbed' % (librepos,myrepos),'None',True),]
|
|
157 return dat
|
|
158
|
|
159 def get_lib_fbatfiles():
|
|
160 dat = glob.glob('%s/plinkfbat/*.ped' % librepos)
|
|
161 dat += glob.glob('%s/plinkfbat/*.ped' % myrepos)
|
|
162 dat.sort()
|
|
163 if len(dat) > 0:
|
|
164 dat = [(x,x,False) for x in dat]
|
|
165 else:
|
|
166 dat = [('No fbat bed files - Please import some to %s/plinkfbat or %s/plinkfbat' % (librepos,myrepos),'None',True),]
|
|
167 return dat
|
|
168
|
|
169 def get_lib_mapfiles():
|
|
170 dat = glob.glob('%s/ped/*.map' % librepos)
|
|
171 dat += glob.glob('%s/ped/*.map' % myrepos)
|
|
172 dat.sort()
|
|
173 if len(dat) > 0:
|
|
174 dat = [(x,x,False) for x in dat]
|
|
175 else:
|
|
176 dat = [('No map files - add some to %s/ped' % librepos,'None',True),]
|
|
177 return dat
|
|
178
|
|
179 def get_my_pedfiles():
|
|
180 dat = glob.glob('%s/*.ped' % myrepos)
|
|
181 if len(dat) > 0:
|
|
182 dat = [(x,x,False) for x in dat]
|
|
183 else:
|
|
184 dat = [('No ped files - add some to %s' % librepos,'None',True),]
|
|
185 return dat
|
|
186
|
|
187 def get_my_mapfiles():
|
|
188 dat = glob.glob('%s/*.map' % myrepos)
|
|
189 if len(dat) > 0:
|
|
190 dat = [(x,x,'True') for x in dat]
|
|
191 else:
|
|
192 dat = [('No ped files - add some to %s' % librepos,'None',True),]
|
|
193 return dat
|
|
194
|
|
195 def get_lib_xlsfiles():
|
|
196 dat = glob.glob('%s/*.xls' % librepos)
|
|
197 if len(dat) > 0:
|
|
198 dat = [(x,x,False) for x in dat]
|
|
199 else:
|
|
200 dat = [('No ped files - add some to %s' % librepos,'None',True),]
|
|
201 return dat
|
|
202
|
|
203 def get_lib_htmlfiles():
|
|
204 dat = glob.glob('%s/*.html' % librepos)
|
|
205 if len(dat) > 0:
|
|
206 dat = [(x,x,False) for x in dat]
|
|
207 else:
|
|
208 dat = [('No ped files - add some to %s' % librepos,'None',True),]
|
|
209 return dat
|
|
210
|
|
211 def get_my_xlsfiles():
|
|
212 dat = glob.glob('%s/*.xls' % myrepos)
|
|
213 if len(dat) > 0:
|
|
214 dat = [(x,x,False) for x in dat]
|
|
215 else:
|
|
216 dat = [('No ped files - add some to %s' % librepos,'None',True),]
|
|
217 return dat
|
|
218
|
|
219 def get_my_htmlfiles():
|
|
220 dat = glob.glob('%s/*.html' % myrepos)
|
|
221 if len(dat) > 0:
|
|
222 dat = [(x,x,False) for x in dat]
|
|
223 else:
|
|
224 dat = [('No ped files - add some to %s' % librepos,'None',True),]
|
|
225 return dat
|
|
226
|
|
227
|