Mercurial > repos > xuebing > sharplabtool
comparison tools/rgenetics/rgClean.py @ 0:9071e359b9a3
Uploaded
| author | xuebing |
|---|---|
| date | Fri, 09 Mar 2012 19:37:19 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:9071e359b9a3 |
|---|---|
| 1 """ | |
| 2 # galaxy tool xml files can define a galaxy supplied output filename | |
| 3 # that must be passed to the tool and used to return output | |
| 4 # here, the plink log file is copied to that file and removed | |
| 5 # took a while to figure this out! | |
| 6 # use exec_before_job to give files sensible names | |
| 7 # | |
| 8 # ross april 14 2007 | |
| 9 # plink cleanup script | |
| 10 # ross lazarus March 2007 for camp illumina whole genome data | |
| 11 # note problems with multiple commands being ignored - eg --freq --missing --mendel | |
| 12 # only the first seems to get done... | |
| 13 # | |
| 14 ##Summary statistics versus inclusion criteria | |
| 15 ## | |
| 16 ##Feature As summary statistic As inclusion criteria | |
| 17 ##Missingness per individual --missing --mind N | |
| 18 ##Missingness per marker --missing --geno N | |
| 19 ##Allele frequency --freq --maf N | |
| 20 ##Hardy-Weinberg equilibrium --hardy --hwe N | |
| 21 ##Mendel error rates --mendel --me N M | |
| 22 # | |
| 23 # call as plinkClean.py $i $o $mind $geno $hwe $maf $mef $mei $outfile | |
| 24 # note plinkClean_code.py does some renaming before the job starts | |
| 25 | |
| 26 | |
| 27 <command interpreter="python2.4"> | |
| 28 rgClean.py '$input_file.extra_files_path' '$input_file.metadata.base_name' '$title' '$mind' '$geno' '$hwe' '$maf' | |
| 29 '$mef' '$mei' '$out_file1' '$out_file1.files_path' '$userId' | |
| 30 | |
| 31 | |
| 32 """ | |
| 33 import sys,shutil,os,subprocess, glob, string, tempfile, time | |
| 34 from rgutils import galhtmlprefix, timenow, plinke | |
| 35 prog = os.path.split(sys.argv[0])[-1] | |
| 36 myversion = 'January 4 2010' | |
| 37 verbose=False | |
| 38 | |
| 39 | |
| 40 def fixoutaff(outpath='',newaff='1'): | |
| 41 """ quick way to create test data sets - set all aff to 1 or 2 for | |
| 42 some hapmap data and then merge | |
| 43 [rerla@beast galaxy]$ head tool-data/rg/library/pbed/affyHM_CEU.fam | |
| 44 1341 14 0 0 2 1 | |
| 45 1341 2 13 14 2 1 | |
| 46 1341 13 0 0 1 1 | |
| 47 1340 9 0 0 1 1 | |
| 48 1340 10 0 0 2 1 | |
| 49 """ | |
| 50 nchanged = 0 | |
| 51 fam = '%s.fam' % outpath | |
| 52 famf = open(fam,'r') | |
| 53 fl = famf.readlines() | |
| 54 famf.close() | |
| 55 for i,row in enumerate(fl): | |
| 56 lrow = row.split() | |
| 57 if lrow[-1] <> newaff: | |
| 58 lrow[-1] = newaff | |
| 59 fl[i] = ' '.join(lrow) | |
| 60 fl[i] += '\n' | |
| 61 nchanged += 1 | |
| 62 fo = open(fam,'w') | |
| 63 fo.write(''.join(fl)) | |
| 64 fo.close() | |
| 65 return nchanged | |
| 66 | |
| 67 | |
| 68 | |
| 69 def clean(): | |
| 70 """ | |
| 71 """ | |
| 72 if len(sys.argv) < 16: | |
| 73 print >> sys.stdout, '## %s expected 12 params in sys.argv, got %d - %s' % (prog,len(sys.argv),sys.argv) | |
| 74 print >> sys.stdout, """this script will filter a linkage format ped | |
| 75 and map file containing genotypes. It takes 14 parameters - the plink --f parameter and" | |
| 76 a new filename root for the output clean data followed by the mind,geno,hwe,maf, mef and mei" | |
| 77 documented in the plink docs plus the file to be returned to Galaxy | |
| 78 called as: | |
| 79 <command interpreter="python"> | |
| 80 rgClean.py '$input_file.extra_files_path' '$input_file.metadata.base_name' '$title' '$mind' | |
| 81 '$geno' '$hwe' '$maf' '$mef' '$mei' '$out_file1' '$out_file1.files_path' | |
| 82 '$relfilter' '$afffilter' '$sexfilter' '$fixaff' | |
| 83 </command> | |
| 84 | |
| 85 """ | |
| 86 sys.exit(1) | |
| 87 plog = [] | |
| 88 inpath = sys.argv[1] | |
| 89 inbase = sys.argv[2] | |
| 90 killme = string.punctuation + string.whitespace | |
| 91 trantab = string.maketrans(killme,'_'*len(killme)) | |
| 92 title = sys.argv[3].translate(trantab) | |
| 93 mind = sys.argv[4] | |
| 94 geno = sys.argv[5] | |
| 95 hwe = sys.argv[6] | |
| 96 maf = sys.argv[7] | |
| 97 me1 = sys.argv[8] | |
| 98 me2 = sys.argv[9] | |
| 99 outfname = sys.argv[10] | |
| 100 outfpath = sys.argv[11] | |
| 101 relf = sys.argv[12] | |
| 102 afff = sys.argv[13] | |
| 103 sexf = sys.argv[14] | |
| 104 fixaff = sys.argv[15] | |
| 105 output = os.path.join(outfpath,outfname) | |
| 106 outpath = os.path.join(outfpath,title) | |
| 107 outprunepath = os.path.join(outfpath,'ldprune_%s' % title) | |
| 108 try: | |
| 109 os.makedirs(outfpath) | |
| 110 except: | |
| 111 pass | |
| 112 bfile = os.path.join(inpath,inbase) | |
| 113 outf = file(outfname,'w') | |
| 114 vcl = [plinke,'--noweb','--bfile',bfile,'--make-bed','--out', | |
| 115 outpath,'--set-hh-missing','--mind',mind, | |
| 116 '--geno',geno,'--maf',maf,'--hwe',hwe,'--me',me1,me2] | |
| 117 # yes - the --me parameter takes 2 values - mendels per snp and per family | |
| 118 if relf == 'oo': # plink filters are what they leave... | |
| 119 vcl.append('--filter-nonfounders') # leave only offspring | |
| 120 elif relf == 'fo': | |
| 121 vcl.append('--filter-founders') | |
| 122 if afff == 'affonly': | |
| 123 vcl.append('--filter-controls') | |
| 124 elif relf == 'unaffonly': | |
| 125 vcl.append('--filter-cases') | |
| 126 if sexf == 'fsex': | |
| 127 vcl.append('--filter-females') | |
| 128 elif relf == 'msex': | |
| 129 vcl.append('--filter-males') | |
| 130 p=subprocess.Popen(' '.join(vcl),shell=True,cwd=outfpath) | |
| 131 retval = p.wait() | |
| 132 plog.append('%s started, called as %s' % (prog,' '.join(sys.argv))) | |
| 133 outf.write(galhtmlprefix % prog) | |
| 134 outf.write('<ul>\n') | |
| 135 plogf = '%s.log' % os.path.join(outfpath,title) | |
| 136 try: | |
| 137 plogl = file(plogf,'r').readlines() | |
| 138 plog += [x.strip() for x in plogl] | |
| 139 except: | |
| 140 plog += ['###Cannot open plink log file %s' % plogf,] | |
| 141 # if fixaff, want to 'fix' the fam file | |
| 142 if fixaff <> '0': | |
| 143 nchanged = fixoutaff(outpath=outpath,newaff=fixaff) | |
| 144 plog += ['## fixaff was requested %d subjects affection status changed to %s' % (nchanged,fixaff)] | |
| 145 pf = file(plogf,'w') | |
| 146 pf.write('\n'.join(plog)) | |
| 147 pf.close() | |
| 148 globme = os.path.join(outfpath,'*') | |
| 149 flist = glob.glob(globme) | |
| 150 flist.sort() | |
| 151 for i, data in enumerate( flist ): | |
| 152 outf.write('<li><a href="%s">%s</a></li>\n' % (os.path.split(data)[-1],os.path.split(data)[-1])) | |
| 153 outf.write('</ul>\n') | |
| 154 outf.write("</ul></br></div></body></html>") | |
| 155 outf.close() | |
| 156 | |
| 157 | |
| 158 if __name__ == "__main__": | |
| 159 clean() | |
| 160 |
