Mercurial > repos > xuebing > sharplabtool
comparison tools/regVariation/qv_to_bqv.py @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9071e359b9a3 |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 """ | |
4 Adapted from bx/scripts/qv_to_bqv.py | |
5 | |
6 Convert a qual (qv) file to several BinnedArray files for fast seek. | |
7 This script takes approximately 4 seconds per 1 million base pairs. | |
8 | |
9 The input format is fasta style quality -- fasta headers followed by | |
10 whitespace separated integers. | |
11 | |
12 usage: %prog qual_file output_file | |
13 """ | |
14 | |
15 import pkg_resources | |
16 pkg_resources.require( "bx-python" ) | |
17 pkg_resources.require( "numpy" ) | |
18 import string | |
19 import psyco_full | |
20 import sys, re, os, tempfile | |
21 from bx.binned_array import BinnedArrayWriter | |
22 from bx.cookbook import * | |
23 import fileinput | |
24 | |
25 def load_scores_ba_dir( dir ): | |
26 """ | |
27 Return a dict-like object (keyed by chromosome) that returns | |
28 FileBinnedArray objects created from "key.ba" files in `dir` | |
29 """ | |
30 return FileBinnedArrayDir( dir ) | |
31 | |
32 def main(): | |
33 args = sys.argv[1:] | |
34 try: | |
35 qual_file_dir = args[0] | |
36 #mydir="/home/gua110/Desktop/chimp_quality_scores/chr22.qa" | |
37 mydir="/home/gua110/Desktop/rhesus_quality_scores/rheMac2.qual.qv" | |
38 qual_file_dir = mydir.replace(mydir.split("/")[-1], "") | |
39 output_file = args[ 1 ] | |
40 fo = open(output_file,"w") | |
41 except: | |
42 print "usage: qual_file output_file" | |
43 sys.exit() | |
44 | |
45 tmpfile = tempfile.NamedTemporaryFile() | |
46 cmdline = "ls " + qual_file_dir + "*.qa | cat >> " + tmpfile.name | |
47 os.system (cmdline) | |
48 for qual_file in tmpfile.readlines(): | |
49 qual = fileinput.FileInput( qual_file.strip() ) | |
50 outfile = None | |
51 outbin = None | |
52 base_count = 0 | |
53 mega_count = 0 | |
54 | |
55 for line in qual: | |
56 line = line.rstrip("\r\n") | |
57 if line.startswith(">"): | |
58 # close old | |
59 if outbin and outfile: | |
60 print "\nFinished region " + region + " at " + str(base_count) + " base pairs." | |
61 outbin.finish() | |
62 outfile.close() | |
63 # start new file | |
64 region = line.lstrip(">") | |
65 #outfname = output_file + "." + region + ".bqv" #CHANGED | |
66 outfname = qual_file.strip() + ".bqv" | |
67 print >>fo, "Writing region " + region + " to file " + outfname | |
68 outfile = open( outfname , "wb") | |
69 outbin = BinnedArrayWriter(outfile, typecode='b', default=0) | |
70 base_count = 0 | |
71 mega_count = 0 | |
72 else: | |
73 if outfile and outbin: | |
74 nums = line.split() | |
75 for val in nums: | |
76 outval = int(val) | |
77 assert outval <= 255 and outval >= 0 | |
78 outbin.write(outval) | |
79 base_count += 1 | |
80 if (mega_count * 1000000) <= base_count: | |
81 sys.stdout.write(str(mega_count)+" ") | |
82 sys.stdout.flush() | |
83 mega_count = base_count // 1000000 + 1 | |
84 if outbin and outfile: | |
85 print "\nFinished region " + region + " at " + str(base_count) + " base pairs." | |
86 outbin.finish() | |
87 outfile.close() | |
88 | |
89 if __name__ == "__main__": | |
90 main() |