Mercurial > repos > sjung > bdds
comparison piq_wrapper.py @ 5:f77a3f2876c3 draft
Uploaded
| author | sjung |
|---|---|
| date | Wed, 24 May 2017 00:36:16 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 4:20bbc86eae21 | 5:f77a3f2876c3 |
|---|---|
| 1 #!/usr/bin/python | |
| 2 | |
| 3 import optparse, os, re, shutil, sys, tempfile, glob, shlex, vcf, pysam, tarfile, csv, operator | |
| 4 from subprocess import * | |
| 5 import subprocess | |
| 6 import multiprocessing | |
| 7 from joblib import Parallel, delayed | |
| 8 from itertools import izip | |
| 9 | |
| 10 NCPU = multiprocessing.cpu_count() | |
| 11 CHUNK_SIZE = 2**20 #1mb | |
| 12 | |
| 13 def create_dict(files): | |
| 14 tmp=dict() | |
| 15 t1 = [os.path.basename(i) for i in files] | |
| 16 t2 = ['-'.join(i.split('-')[0:2]) for i in t1 if 'RC' not in i] | |
| 17 for i in t2: | |
| 18 for j in files: | |
| 19 if i in j and i in tmp: | |
| 20 tmp[i].append(j) | |
| 21 elif i in j and i not in tmp: | |
| 22 tmp[i] = [j] | |
| 23 else: | |
| 24 continue | |
| 25 return tmp | |
| 26 | |
| 27 def file_merge(file1, file2, out_dir): | |
| 28 listF = (file1, file2) | |
| 29 outF=os.path.splitext(os.path.basename(file1))[0] | |
| 30 outExt = os.path.splitext(os.path.basename(file1))[1] | |
| 31 with open("%s/%s_merged%s" % (out_dir,outF,outExt),"wb") as fw: | |
| 32 for i in range(len(listF)): | |
| 33 with open(listF[i],"rb") as f: | |
| 34 writer = csv.writer(fw, delimiter=',', quotechar='', quoting=csv.QUOTE_NONE, escapechar='\\') | |
| 35 for i in range(len(listF)): | |
| 36 with open(listF[i],"rb") as f: | |
| 37 file_f=csv.reader(f, delimiter='\t') | |
| 38 file_f.next() | |
| 39 for line in file_f: | |
| 40 writer.writerow(line) | |
| 41 | |
| 42 | |
| 43 def cleanup_before_exit( tmp_dir ): | |
| 44 if tmp_dir and os.path.exists( tmp_dir ): | |
| 45 shutil.rmtree( tmp_dir ) | |
| 46 | |
| 47 def parallel_jobs(i, piq_dir_path, tmp_dir, inputF, out_dir, output_dir): | |
| 48 cmd=[] | |
| 49 motif=i | |
| 50 piq_cmd1 = "Rscript %s/pwmmatch.exact.r %s/common.r %s/jasparVertebrateCore2016-519-matrices.txt %d %s/; " % (piq_dir_path, piq_dir_path, piq_dir_path, motif, tmp_dir) | |
| 51 #piq_cmd1 = "Rscript %s/pwmmatch.exact.r %s/common.r %s/jasparfix.txt %d %s/; " % (piq_dir_path, piq_dir_path, piq_dir_path, motif, tmp_dir) | |
| 52 piq_cmd2 = "Rscript %s/bam2rdata.r %s/common.r %s/bam.RData %s; " % (piq_dir_path, piq_dir_path, tmp_dir, inputF) | |
| 53 piq_cmd3 = "Rscript %s/pertf.r %s/common.r %s/ %s %s/ %s/bam.RData %d; " % (piq_dir_path, piq_dir_path, tmp_dir, tmp_dir, out_dir, tmp_dir, motif) | |
| 54 piq_cmd = piq_cmd1+piq_cmd2+piq_cmd3 | |
| 55 cmd.append(piq_cmd) | |
| 56 cmd="".join(cmd) | |
| 57 #print "cmd:%s" % cmd | |
| 58 stdout = tempfile.NamedTemporaryFile( prefix="piq-stdout-", dir=tmp_dir ) | |
| 59 stderr = tempfile.NamedTemporaryFile( prefix="piq-stderr-", dir=tmp_dir ) | |
| 60 proc = subprocess.Popen( args=cmd, stdout=stdout, stderr=stderr, shell=True, cwd=output_dir ) | |
| 61 return_code = proc.wait() | |
| 62 | |
| 63 if return_code: | |
| 64 stderr_target = sys.stderr | |
| 65 else: | |
| 66 stderr_target = sys.stdout | |
| 67 stderr.flush() | |
| 68 stderr.seek(0) | |
| 69 while True: | |
| 70 chunk = stderr.read( CHUNK_SIZE ) | |
| 71 if chunk: | |
| 72 stderr_target.write( chunk ) | |
| 73 else: | |
| 74 break | |
| 75 stderr.close() | |
| 76 stdout.close() | |
| 77 | |
| 78 def __main__(): | |
| 79 piq_dir_path="/mnt/galaxyTools/tools/piq/1.3/thashim" | |
| 80 parser = optparse.OptionParser() | |
| 81 parser.add_option('', '--input', dest="inputF", action='store', type="string", default=None, help='') | |
| 82 parser.add_option( '', '--output', dest='outputF', action='store', type="string", default=None, help='') | |
| 83 parser.add_option( '', '--out-dir', dest='output_dir', action='store', type="string", default=None, help='If specified, the output directory for extra files.' ) | |
| 84 | |
| 85 (options, args) = parser.parse_args() | |
| 86 | |
| 87 if not os.path.exists(options.output_dir): | |
| 88 os.mkdir(options.output_dir) | |
| 89 out_dir = "%s/out" % options.output_dir; | |
| 90 if not os.path.exists(out_dir): | |
| 91 os.mkdir(out_dir) | |
| 92 tmp_dir = "%s/tmp" % options.output_dir; | |
| 93 if not os.path.exists(tmp_dir): | |
| 94 os.mkdir(tmp_dir) | |
| 95 | |
| 96 maxMotif=519 | |
| 97 Parallel(n_jobs=NCPU)(delayed(parallel_jobs)(i, piq_dir_path, tmp_dir, options.inputF, out_dir, options.output_dir) for i in range(1,maxMotif+1)) | |
| 98 | |
| 99 #combine files | |
| 100 csvFiles =sorted(glob.glob("%s/*calls.all.csv" % out_dir)) | |
| 101 csvList=create_dict(csvFiles) | |
| 102 | |
| 103 for v in csvList.values(): | |
| 104 if len(v) == 2: | |
| 105 file_merge(v[0],v[1],out_dir) | |
| 106 else: | |
| 107 outF=os.path.splitext(os.path.basename(v[0]))[0] | |
| 108 outExt=os.path.splitext(os.path.basename(v[0]))[1] | |
| 109 shutil.copy(v[0],"%s/%s_merged%s" % (out_dir, outF, outExt)) | |
| 110 | |
| 111 bedFiles=sorted(glob.glob("%s/*calls.all.bed" % out_dir)) | |
| 112 bedList=create_dict(bedFiles) | |
| 113 | |
| 114 for v in bedList.values(): | |
| 115 if len(v) == 2: | |
| 116 file_merge(v[0],v[1],out_dir) | |
| 117 else: | |
| 118 outF=os.path.splitext(os.path.basename(v[0]))[0] | |
| 119 outExt=os.path.splitext(os.path.basename(v[0]))[1] | |
| 120 shutil.copy(v[0],"%s/%s_merged%s" % (out_dir, outF, outExt)) | |
| 121 | |
| 122 m_csvFiles=sorted(glob.glob("%s/*_merged.csv" % out_dir)) | |
| 123 m_bedFiles=sorted(glob.glob("%s/*_merged.bed" % out_dir)) | |
| 124 | |
| 125 for i in range(len(m_csvFiles)): | |
| 126 outF=os.path.splitext(os.path.basename(m_csvFiles[i]))[0] | |
| 127 #print "\noutF: %s\n" % outF | |
| 128 with open('%s/%s_all_tmp' % (out_dir,outF), 'wb') as res, open(m_bedFiles[i]) as f1, open(m_csvFiles[i]) as f2: | |
| 129 for line1, line2 in zip(f1, f2): | |
| 130 res.write("{},{}\n".format(line1.rstrip(), line2.rstrip())) | |
| 131 with open('%s/%s_all_tmp' % (out_dir,outF), "rb") as f3: | |
| 132 rdr=csv.reader(f3) | |
| 133 | |
| 134 with open('%s/%s_final.bed' % (out_dir,outF),'wb') as result: | |
| 135 wtr = csv.writer(result, delimiter='\t', quotechar='', quoting=csv.QUOTE_NONE) | |
| 136 #for r in sorted_list: | |
| 137 for r in rdr: | |
| 138 if len(r) > 12: | |
| 139 wtr.writerow((r[0],r[1],r[2],r[3],r[5],r[9].replace('\\',""),r[10].replace('\\',""),r[11].replace('\\',""),r[12])) | |
| 140 filenames=sorted(glob.glob("%s/*_final.bed" % out_dir)) | |
| 141 #shutil.copy('%s/*_final.bed' % out_dir, options.output_dir) | |
| 142 with open('%s/all_motif_comb_final.bed' % options.output_dir, 'w') as outfile: | |
| 143 for fname in filenames: | |
| 144 shutil.copy('%s' % fname, options.output_dir) | |
| 145 with open(fname) as infile: | |
| 146 for line in infile: | |
| 147 outfile.write(line) | |
| 148 | |
| 149 shutil.copy('%s/all_motif_comb_final.bed' % options.output_dir, options.outputF) | |
| 150 | |
| 151 # concatenerate calls.all and RC.calls.all for csv and bed files | |
| 152 cleanup_before_exit( options.output_dir ) | |
| 153 | |
| 154 if __name__=="__main__": __main__() | |
| 155 |
