annotate Contra/baseline.py @ 4:9a76d500b049

Uploaded
author fcaramia
date Thu, 13 Sep 2012 02:44:08 -0400
parents 7564f3b1e675
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
1 #!/usr/bin/python
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
2
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
3 import os
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
4 import sys
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
5 import fnmatch
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
6 import shlex
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
7 import subprocess
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
8 import shutil
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
9 import math
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
10 from optparse import OptionParser
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
11 from scripts.split_chromosome import splitByChromosome3 as splitByChromosome
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
12 from scripts.get_chr_length import *
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
13 from scripts.convert_targeted_regions import *
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
14 from scripts.convert_gene_coordinate import convertGeneCoordinate2 as convertGeneCoordinate
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
15 from multiprocessing import Pool
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
16
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
17 class Params:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
18 """
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
19 Class for top-level system parameters:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
20 """
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
21
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
22 def __init__(self):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
23
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
24 def mult_files(option, opt_str, value, parser):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
25 args=[]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
26 for arg in parser.rargs:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
27 if arg[0] != "-":
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
28 args.append(arg)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
29 else:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
30 del parser.rargs[:len(args)]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
31 break
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
32 if getattr(parser.values, option.dest):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
33 args.extend(getattr(parser.values, option.dest))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
34 setattr(parser.values, option.dest, args)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
35
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
36
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
37 #command-line option definition
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
38 self.parser = OptionParser()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
39 self.parser.add_option("-t", "--target",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
40 help="Target region definition file [REQUIRED] [BED Format]",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
41 action="store", type="string", dest="target")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
42 self.parser.add_option("-f", "--files",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
43 help="Files to be converted to baselines [REQUIRED] [BAM]",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
44 action="callback", callback=mult_files, dest="files")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
45 self.parser.add_option("-o", "--output",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
46 help="Output folder [REQUIRED]",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
47 action="store", type="string", dest="output")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
48 self.parser.add_option("-c","--trim",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
49 help="Portion of outliers to be removed before calculating average [Default: 0.2]",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
50 action="store", dest="trim", default="0.2")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
51 self.parser.add_option("-n","--name",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
52 help="Output baseline name [Default: baseline]",
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
53 action="store", dest="name", default="baseline")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
54
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
55 # required parameters list:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
56 self.ERRORLIST = []
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
57
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
58 #change system parameters based on any command line arguments
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
59 (options, args) = self.parser.parse_args()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
60 if options.target:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
61 self.TARGET=options.target
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
62 else:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
63 self.ERRORLIST.append("target")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
64
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
65 if options.files:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
66 self.FILES=options.files
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
67 else:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
68 self.ERRORLIST.append("files")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
69
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
70 if options.output:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
71 self.OUTPUT=options.output
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
72 else:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
73 self.ERRORLIST.append("output")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
74
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
75 if options.trim:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
76 self.TRIM=float(options.trim)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
77
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
78 if options.name:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
79 self.NAME=options.name
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
80
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
81 if len(self.ERRORLIST) != 0:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
82 self.parser.print_help()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
83 self.parser.error("Missing required parameters")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
84
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
85
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
86 def repeat(self):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
87 #params test
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
88 print "target :", self.TARGET
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
89 print "files :", self.FILES
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
90 print "output :", self.OUTPUT
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
91 print "trim :", self.TRIM
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
92 print "name :", self.NAME
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
93
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
94
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
95 # option handling
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
96 params = Params()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
97 #params.repeat()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
98 targetFile = params.TARGET
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
99 infiles = params.FILES
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
100 output_dir = params.OUTPUT
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
101
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
102 #Debug
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
103 print " ------ baseline.py ------- "
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
104 print "Target:", targetFile
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
105 for files in infiles:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
106 print "File:", files
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
107 print "Output Directory: ", output_dir
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
108
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
109 def make_new_directory(outdir):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
110 print outdir
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
111 if not os.path.exists(outdir):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
112 os.mkdir(outdir)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
113
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
114 print " ----- creating output directory -----"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
115 make_new_directory(output_dir)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
116 outdir = os.path.join(output_dir, "buf")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
117 make_new_directory(outdir)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
118
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
119 targetFile2 = os.path.join(outdir, os.path.basename(targetFile)+".sorted")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
120 os.system("sort -k1,1 -k2n %s > %s" %(targetFile,targetFile2))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
121
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
122 def processInFile(infile):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
123 infilename = os.path.basename(infile)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
124 s_outdir = os.path.join(outdir, infilename)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
125 make_new_directory(s_outdir)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
126 genomeFile = os.path.join(s_outdir,infilename +'.chrsummary')
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
127 get_genome(infile, genomeFile)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
128
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
129 bedgraph = os.path.join(s_outdir, infilename + ".BEDGRAPH")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
130 args = shlex.split("genomeCoverageBed -ibam %s -bga -g %s" %(infile, genomeFile))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
131 iOutFile = open(bedgraph, "w")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
132 output = subprocess.Popen(args, stdout = iOutFile).wait()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
133 iOutFile.close()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
134
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
135 targetList = convertTarget(targetFile2)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
136
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
137 splitByChromosome(bedgraph)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
138
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
139 convertGeneCoordinate(targetList, os.path.dirname(bedgraph)+"/")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
140 bedgraph_tgtonly = bedgraph+".TARGETONLY"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
141 bedgraph_tgtonly_avg = bedgraph+".TARGETONLY.AVERAGE"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
142 os.rename(os.path.join(os.path.dirname(bedgraph),"geneRefCoverage.txt"),bedgraph_tgtonly)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
143 os.rename(os.path.join(os.path.dirname(bedgraph),"geneRefCoverage_targetAverage.txt"),bedgraph_tgtonly_avg)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
144 shutil.copy(bedgraph_tgtonly,outdir)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
145 shutil.copy(bedgraph_tgtonly_avg,outdir)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
146
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
147 print "----- Processing Files -----"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
148 pool = Pool(5)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
149 pool.map(processInFile,infiles)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
150
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
151
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
152 # STEP 2 -> Create Union BedGraph
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
153 allfiles_names = [x for x in os.listdir(outdir) if x.endswith("TARGETONLY")]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
154 allfiles_path = [os.path.join(outdir,x) for x in allfiles_names]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
155
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
156 args=["unionBedGraphs","-header","-i"]+allfiles_path+["-names"]+allfiles_names
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
157
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
158 print (str(args))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
159 fo = os.path.join(outdir,"TARGETONLY.union.txt")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
160 foh = open(fo,"w")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
161
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
162 subprocess.Popen(args,stdout=foh).wait()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
163 foh.close()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
164
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
165 # STEP 3 -> POOL METHOD
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
166 TRIM = params.TRIM #TRIM = 0.2
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
167 f = fo
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
168 fh = open(f)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
169
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
170 fo=os.path.join(output_dir, params.NAME+".pooled2_TRIM"+str(TRIM)+".txt")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
171 fo_libsize=fo+".LIBSIZE"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
172
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
173
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
174 foh=open(fo,'w')
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
175 fo_libsize_h=open(fo_libsize,'w')
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
176
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
177
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
178 fh.readline()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
179
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
180 Ns = None
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
181 nSamples = None
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
182
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
183 lineCount=0
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
184 for line in fh:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
185 lineCount+=1
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
186 line_elems=line.rstrip().split("\t")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
187 rangeLen=int(line_elems[2])-int(line_elems[1])
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
188 if not Ns:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
189 nSamples=len(line_elems)-3
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
190 Ns = [0 for x in range(nSamples)]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
191 for i in range(nSamples):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
192 ind=i+3
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
193 Ns[i] += int(line_elems[ind])*rangeLen
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
194
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
195
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
196 fh.close()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
197 fh=open(f)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
198 fh.readline()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
199
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
200
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
201 lineCount=0
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
202 nSamples_exclude = int(math.floor(TRIM*nSamples))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
203
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
204 def gm_mean(xs):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
205 tmpprod = 1
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
206 p = 1.0/len(xs)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
207 for x in xs:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
208 tmpprod = tmpprod * math.pow(x,p)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
209 return tmpprod
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
210
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
211 Ns_gmean = gm_mean(Ns)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
212
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
213 def meanstdv(x):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
214 from math import sqrt
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
215 n, mean, std = len(x), 0, 0
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
216 for a in x:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
217 mean = mean+a
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
218 mean = mean / float(n)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
219 for a in x:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
220 std = std+(a-mean)**2
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
221 std=sqrt(std/float(n-1))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
222 return mean, std
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
223
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
224
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
225 libsize = 0
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
226 for line in fh:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
227 lineCount+=1
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
228 line_elems=line.rstrip().split("\t")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
229 rangeLen=int(line_elems[2])-int(line_elems[1])
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
230 xs_tmp=[int(x) for x in line_elems[3:]]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
231 xs = [float(xs_tmp[i])*Ns_gmean/Ns[i] for i in range(len(xs_tmp))]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
232 xs.sort()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
233 xs_trimmed=xs[nSamples_exclude:(nSamples-nSamples_exclude)]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
234 #trimmed_mean=sum(xs_trimmed)/float(len(xs_trimmed))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
235 trimmed_mean,std=meanstdv(xs_trimmed)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
236 libsize+=rangeLen*trimmed_mean
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
237 foh.write("\t".join(line_elems[0:3]+[str(trimmed_mean),str(std)])+"\n")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
238
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
239
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
240 fo_libsize_h.write(str(int(round(libsize))))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
241
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
242 fh.close()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
243 foh.close()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
244 fo_libsize_h.close()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
245
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
246 def removeTempFolder(tempFolderPath):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
247 import shutil
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
248
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
249 shutil.rmtree(tempFolderPath)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
250 print "Temp Folder Removed"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
251
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
252 # Removed Temp Folder
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
253 removeTempFolder(outdir)