Mercurial > repos > xuebing > sharplabtool
diff tools/rgenetics/listFiles.py @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/rgenetics/listFiles.py Fri Mar 09 19:37:19 2012 -0500 @@ -0,0 +1,227 @@ +#Provides Upload tool with access to list of available files +import glob,sys +import galaxy.app as thisapp +import galaxy.util + +from elementtree.ElementTree import XML + +librepos = '/usr/local/galaxy/data/rg' +myrepos = '/home/rerla/galaxy' +marchinirepos = '/usr/local/galaxy/data/rg/snptest' + +from galaxy.tools.parameters import DataToolParameter + +#Provides Upload tool with access to list of available builds + +builds = [] +#Read build names and keys from galaxy.util +for dbkey, build_name in galaxy.util.dbnames: + builds.append((build_name,dbkey,False)) + +#Return available builds +def get_available_builds(defval='hg18'): + for i,x in enumerate(builds): + if x[1] == defval: + x = list(x) + x[2] = True + builds[i] = tuple(x) + return builds + + + +def get_tabular_cols( input, outformat='gg' ): + """numeric only other than rs for strict genome graphs + otherwise tabular. Derived from galaxy tool source around August 2007 by Ross""" + columns = [] + seenCnames = {} + elems = [] + colnames = ['Col%d' % x for x in range(input.metadata.columns+1)] + strict = (outformat=='gg') + for i, line in enumerate( file ( input.file_name ) ): + if line and not line.startswith( '#' ): + line = line.rstrip('\r\n') + elems = line.split( '\t' ) + + """ + Strict gg note: + Since this tool requires users to select only those columns + that contain numerical values, we'll restrict the column select + list appropriately other than the first column which must be a marker + """ + if len(elems) > 0: + for col in range(1, input.metadata.columns+1): + isFloat = False # short circuit common result + try: + val = float(elems[col-1]) + isFloat = True + except: + val = elems[col-1] + if val: + if i == 0: # header row + colnames[col] = val + if isFloat or (not strict) or (col == 1): # all in if not GG + option = colnames[col] + if not seenCnames.get(option,None): # new + columns.append((option,str(col),False)) + seenCnames[option] = option + #print 'get_tab: %d=%s. Columns=%s' % (i,line,str(columns)) + if len(columns) > 0 and i > 10: + """ + We have our select list built, so we can break out of the outer most for loop + """ + break + if i == 30: + break # Hopefully we never get here... + for option in range(min(5,len(columns))): + (x,y,z) = columns[option] + columns[option] = (x,y,True) + return columns # sorted select options + +def get_marchini_dir(): + """return the filesystem directory for snptest style files""" + return marchinirepos + + +def get_lib_SNPTESTCaCofiles(): + """return a list of file names - without extensions - available for caco studies + These have a common file name with both _1 and _2 suffixes""" + d = get_marchini_dir() + testsuffix = '.gen_1' # glob these + flist = glob.glob('%s/*%s' % (d,testsuffix)) + flist = [x.split(testsuffix)[0] for x in flist] # leaves with a list of file set names + if len(flist) > 0: + dat = [(flist[0],flist[0],True),] + dat += [(x,x,False) for x in flist[1:]] + else: + dat = [('No Marchini CaCo files found in %s - convert some using the Marchini converter tool' % d,'None',True),] + return dat + +def getChropt(): + """return dynamic chromosome select options + """ + c = ['X','Y'] + c += ['%d' % x for x in range(1,23)] + dat = [(x,x,False) for x in c] + x,y,z = dat[3] + dat[3] = (x,y,True) + return dat + + +def get_phecols(fname=''): + """ return a list of phenotype columns for a multi-select list + prototype: + foo = ('fake - not yet implemented','not implemented','False') + dat = [foo for x in range(5)] + return dat + """ + try: + header = file(fname,'r').next().split() + except: + return [('get_phecols unable to open file %s' % fname,'None',False),] + dat = [(x,x,False) for x in header] + return dat + +#Return various kinds of files + +def get_lib_pedfiles(): + dat = glob.glob('%s/ped/*.ped' % librepos) + dat += glob.glob('%s/ped/*.ped' % myrepos) + dat.sort() + if len(dat) > 0: + dat = [x.split('.ped')[0] for x in dat] + dat = [(x,x,'True') for x in dat] + else: + dat = [('No ped files - add some to %s/ped or %s/ped' % (librepos,myrepos),'None',True),] + return dat + +def get_lib_phefiles(): + ext = 'phe' + dat = glob.glob('%s/pheno/*.%s' % (librepos,ext)) + dat += glob.glob('%s/pheno/*.%s' % (myrepos,ext)) + dat.sort() + if len(dat) > 0: + dat = [(x,x,'False') for x in dat] + else: + dat = [('No %s files - add some to %s/pheno or %s/pheno' % (ext,librepos,myrepos),'None',True),] + return dat + +def get_lib_bedfiles(): + dat = glob.glob('%s/plinkbed/*.bed' % librepos) + dat += glob.glob('%s/plinkbed/*.bed' % myrepos) + dat.sort() + if len(dat) > 0: + dat = [x.split('.bed')[0] for x in dat] + dat = [(x,x,False) for x in dat] + else: + dat = [('No bed files - Please import some to %s/plinkbed or %s/plinkbed' % (librepos,myrepos),'None',True),] + return dat + +def get_lib_fbatfiles(): + dat = glob.glob('%s/plinkfbat/*.ped' % librepos) + dat += glob.glob('%s/plinkfbat/*.ped' % myrepos) + dat.sort() + if len(dat) > 0: + dat = [(x,x,False) for x in dat] + else: + dat = [('No fbat bed files - Please import some to %s/plinkfbat or %s/plinkfbat' % (librepos,myrepos),'None',True),] + return dat + +def get_lib_mapfiles(): + dat = glob.glob('%s/ped/*.map' % librepos) + dat += glob.glob('%s/ped/*.map' % myrepos) + dat.sort() + if len(dat) > 0: + dat = [(x,x,False) for x in dat] + else: + dat = [('No map files - add some to %s/ped' % librepos,'None',True),] + return dat + +def get_my_pedfiles(): + dat = glob.glob('%s/*.ped' % myrepos) + if len(dat) > 0: + dat = [(x,x,False) for x in dat] + else: + dat = [('No ped files - add some to %s' % librepos,'None',True),] + return dat + +def get_my_mapfiles(): + dat = glob.glob('%s/*.map' % myrepos) + if len(dat) > 0: + dat = [(x,x,'True') for x in dat] + else: + dat = [('No ped files - add some to %s' % librepos,'None',True),] + return dat + +def get_lib_xlsfiles(): + dat = glob.glob('%s/*.xls' % librepos) + if len(dat) > 0: + dat = [(x,x,False) for x in dat] + else: + dat = [('No ped files - add some to %s' % librepos,'None',True),] + return dat + +def get_lib_htmlfiles(): + dat = glob.glob('%s/*.html' % librepos) + if len(dat) > 0: + dat = [(x,x,False) for x in dat] + else: + dat = [('No ped files - add some to %s' % librepos,'None',True),] + return dat + +def get_my_xlsfiles(): + dat = glob.glob('%s/*.xls' % myrepos) + if len(dat) > 0: + dat = [(x,x,False) for x in dat] + else: + dat = [('No ped files - add some to %s' % librepos,'None',True),] + return dat + +def get_my_htmlfiles(): + dat = glob.glob('%s/*.html' % myrepos) + if len(dat) > 0: + dat = [(x,x,False) for x in dat] + else: + dat = [('No ped files - add some to %s' % librepos,'None',True),] + return dat + +