Mercurial > repos > artbio > lumpy_sv
diff pairend_distro.py @ 1:1ed8619a5611 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy-sv commit 0b55a106b1f76e3cc3d89932fef2cc8d3eb24e4f
author | artbio |
---|---|
date | Wed, 26 Jul 2017 18:17:01 -0400 |
parents | 796552c157de |
children | 093bb151a0a8 |
line wrap: on
line diff
--- a/pairend_distro.py Mon Jul 24 08:03:17 2017 -0400 +++ b/pairend_distro.py Wed Jul 26 18:17:01 2017 -0400 @@ -9,9 +9,9 @@ # rl6sf@virginia.edu import sys +from optparse import OptionParser + import numpy as np -from operator import itemgetter -from optparse import OptionParser # some constants for sam/bam field ids SAM_FLAG = 1 @@ -20,32 +20,16 @@ SAM_ISIZE = 8 parser = OptionParser() - -parser.add_option("-r", - "--read_length", - type="int", - dest="read_length", - help="Read length") - -parser.add_option("-X", - dest="X", - type="int", - help="Number of stdevs from mean to extend") +parser.add_option("-r", "--read_length", type="int", dest="read_length", + help="Read length") +parser.add_option("-X", dest="X", type="int", + help="Number of stdevs from mean to extend") +parser.add_option("-N", dest="N", type="int", help="Number to sample") +parser.add_option("-o", dest="output_file", help="Output file") +parser.add_option("-m", dest="mads", type="int", default=10, + help='''Outlier cutoff in # of median absolute deviations + (unscaled, upper only)''') -parser.add_option("-N", - dest="N", - type="int", - help="Number to sample") - -parser.add_option("-o", - dest="output_file", - help="Output file") - -parser.add_option("-m", - dest="mads", - type="int", - default=10, - help="Outlier cutoff in # of median absolute deviations (unscaled, upper only)") def unscaled_upper_mad(xs): """Return a tuple consisting of the median of xs followed by the @@ -96,7 +80,8 @@ # warn if very few elements in distribution min_elements = 1000 if len(L) < min_elements: - sys.stderr.write("Warning: only %s elements in distribution (min: %s)\n" % (len(L), min_elements)) + sys.stderr.write("Warning: only %s elements in distribution (min: %s)\n" % + (len(L), min_elements)) mean = "NA" stdev = "NA" @@ -110,7 +95,7 @@ new_len = len(L) removed = c - new_len sys.stderr.write("Removed %d outliers with isize >= %d\n" % - (removed, upper_cutoff)) + (removed, upper_cutoff)) c = new_len mean = np.mean(L) @@ -125,7 +110,7 @@ for x in L: if (x >= start) and (x <= end): j = int(x - start) - H[j] = H[ int(x - start) ] + 1 + H[j] = H[int(x - start)] + 1 s += 1 f = open(options.output_file, 'w') @@ -133,8 +118,5 @@ for i in range(end - start): o = str(i) + "\t" + str(float(H[i])/float(s)) + "\n" f.write(o) - - f.close() - print('mean:' + str(mean) + '\tstdev:' + str(stdev))