annotate BSseeker2/bs_align/output.py @ 1:8b26adf64adc draft default tip

V2.0.5
author weilong-guo
date Tue, 05 Nov 2013 01:55:39 -0500
parents e6df770c0e58
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1 try :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
2 import pysam
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
3 except ImportError :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
4 print "[Error] It seems that you haven't install \"pysam\" package.. Please do it before you run this script."
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
5 exit(-1)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
6
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
7 import sys
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
8 from bs_align_utils import *
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
9
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
10 BAM = 'bam'
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
11 SAM = 'sam'
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
12 BS_SEEKER1 = 'bs_seeker1'
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
13
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
14 formats = [BAM, SAM, BS_SEEKER1]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
15
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
16 class outfile:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
17 def __init__(self, filename, format, chrom_len, cmd_line, suppress_SAM_header):
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
18 self.filename = filename
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
19 self.format = format
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
20 self.chrom_ids = dict((k, i) for i, k in enumerate(sorted(chrom_len)))
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
21
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
22 if format == BS_SEEKER1:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
23 self.f = open(filename, 'w')
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
24 elif format in [SAM, BAM]:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
25 header = { 'HD' : { 'VN': '1.0'},
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
26 'SQ' : [ {'LN' : chrom_len[c], 'SN' : c} for c in sorted(chrom_len) ],
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
27 'PG' : [ { 'ID' : 1, 'PN' : 'BS Seeker 2', 'CL' : cmd_line} ]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
28 }
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
29 self.f = pysam.Samfile(filename, 'w' + ('b' if format == BAM else ('' if suppress_SAM_header else 'h')), header = header)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
30
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
31
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
32
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
33 def close(self):
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
34 self.f.close()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
35
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
36 def store(self, qname, N_mismatch, FR, refname, strand, pos, cigar, original_BS, methy, STEVE, rnext = -1, pnext = -1, qual = None, output_genome = None,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
37 rrbs = False, my_region_serial = -1, my_region_start = 0, my_region_end = 0):
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
38
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
39 if self.format == BS_SEEKER1:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
40
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
41 # remove the soft clipped bases from the read
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
42 # this is done for backwards compatibility with the old format
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
43 r_start, r_end, _ = get_read_start_end_and_genome_length(cigar)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
44 original_BS = original_BS[r_start : r_end]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
45
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
46 if rrbs:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
47 self.f.write('%s\t%2d\t%s\t%s%s%s\t%s\t%s\t%s\t%d\n' % (qname, N_mismatch, FR, refname, strand, str(pos+1).zfill(10), output_genome, original_BS, methy, STEVE))
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
48 else:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
49 self.f.write('%s\t%2d\t%s\t%s%s%s\t%s\t%s\t%s\t%d\t%d\t%d\t%d\n' % (qname, N_mismatch, FR, refname, strand, str(pos+1).zfill(10), output_genome, original_BS, methy, my_region_serial, my_region_start, my_region_end, STEVE))
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
50
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
51
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
52 elif self.format == BAM or self.format == SAM:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
53
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
54 a = pysam.AlignedRead()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
55 a.qname = qname
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
56 a.seq = original_BS if strand == '+' else reverse_compl_seq(original_BS)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
57 a.flag = 0x10 if strand == '-' else 0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
58 a.tid = self.chrom_ids[refname]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
59 a.pos = pos
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
60 a.mapq = 255
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
61 a.cigar = cigar if strand == '+' else list(reversed(cigar))
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
62 a.rnext = rnext if rnext == -1 else self.chrom_ids[rnext]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
63 a.pnext = pnext
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
64 a.qual= qual
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
65 if rrbs:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
66 a.tags = (('XO', FR),
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
67 ('XS', STEVE),
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
68 ('NM', N_mismatch),
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
69 ('XM', methy),
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
70 ('XG', output_genome),
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
71 ('YR', my_region_serial),
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
72 ('YS', my_region_start),
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
73 ('YE', my_region_end)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
74 )
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
75
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
76 else:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
77 a.tags = (('XO', FR),
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
78 ('XS', STEVE),
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
79 ('NM', N_mismatch),
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
80 ('XM', methy),
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
81 ('XG', output_genome))
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
82
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
83 self.f.write(a)
1
weilong-guo
parents: 0
diff changeset
84
weilong-guo
parents: 0
diff changeset
85
weilong-guo
parents: 0
diff changeset
86 def store2(self, qname, flag, N_mismatch, FR, refname, strand, pos, cigar, original_BS, methy, STEVE, rnext = -1, pnext = -1, qual = None, output_genome = None,
weilong-guo
parents: 0
diff changeset
87 rrbs = False, my_region_serial = -1, my_region_start = 0, my_region_end = 0):
weilong-guo
parents: 0
diff changeset
88
weilong-guo
parents: 0
diff changeset
89 if self.format == BS_SEEKER1:
weilong-guo
parents: 0
diff changeset
90
weilong-guo
parents: 0
diff changeset
91 # remove the soft clipped bases from the read
weilong-guo
parents: 0
diff changeset
92 # this is done for backwards compatibility with the old format
weilong-guo
parents: 0
diff changeset
93 r_start, r_end, _ = get_read_start_end_and_genome_length(cigar)
weilong-guo
parents: 0
diff changeset
94 original_BS = original_BS[r_start : r_end]
weilong-guo
parents: 0
diff changeset
95
weilong-guo
parents: 0
diff changeset
96 if rrbs:
weilong-guo
parents: 0
diff changeset
97 self.f.write('%s\t%2d\t%s\t%s%s%s\t%s\t%s\t%s\t%d\n' % (qname, N_mismatch, FR, refname, strand, str(pos+1).zfill(10), output_genome, original_BS, methy, STEVE))
weilong-guo
parents: 0
diff changeset
98 else:
weilong-guo
parents: 0
diff changeset
99 self.f.write('%s\t%2d\t%s\t%s%s%s\t%s\t%s\t%s\t%d\t%d\t%d\t%d\n' % (qname, N_mismatch, FR, refname, strand, str(pos+1).zfill(10), output_genome, original_BS, methy, my_region_serial, my_region_start, my_region_end, STEVE))
weilong-guo
parents: 0
diff changeset
100
weilong-guo
parents: 0
diff changeset
101
weilong-guo
parents: 0
diff changeset
102 elif self.format == BAM or self.format == SAM:
weilong-guo
parents: 0
diff changeset
103
weilong-guo
parents: 0
diff changeset
104 a = pysam.AlignedRead()
weilong-guo
parents: 0
diff changeset
105 a.qname = qname
weilong-guo
parents: 0
diff changeset
106 a.seq = original_BS if strand == '+' else reverse_compl_seq(original_BS)
weilong-guo
parents: 0
diff changeset
107 a.flag = flag
weilong-guo
parents: 0
diff changeset
108 a.tid = self.chrom_ids[refname]
weilong-guo
parents: 0
diff changeset
109 a.pos = pos
weilong-guo
parents: 0
diff changeset
110 a.mapq = 255
weilong-guo
parents: 0
diff changeset
111 a.cigar = cigar if strand == '+' else list(reversed(cigar))
weilong-guo
parents: 0
diff changeset
112 a.rnext = rnext if rnext == -1 else self.chrom_ids[rnext]
weilong-guo
parents: 0
diff changeset
113 a.pnext = pnext
weilong-guo
parents: 0
diff changeset
114 a.qual= qual
weilong-guo
parents: 0
diff changeset
115 if rrbs:
weilong-guo
parents: 0
diff changeset
116 a.tags = (('XO', FR),
weilong-guo
parents: 0
diff changeset
117 ('XS', STEVE),
weilong-guo
parents: 0
diff changeset
118 ('NM', N_mismatch),
weilong-guo
parents: 0
diff changeset
119 ('XM', methy),
weilong-guo
parents: 0
diff changeset
120 ('XG', output_genome),
weilong-guo
parents: 0
diff changeset
121 ('YR', my_region_serial),
weilong-guo
parents: 0
diff changeset
122 ('YS', my_region_start),
weilong-guo
parents: 0
diff changeset
123 ('YE', my_region_end)
weilong-guo
parents: 0
diff changeset
124 )
weilong-guo
parents: 0
diff changeset
125
weilong-guo
parents: 0
diff changeset
126 else:
weilong-guo
parents: 0
diff changeset
127 a.tags = (('XO', FR),
weilong-guo
parents: 0
diff changeset
128 ('XS', STEVE),
weilong-guo
parents: 0
diff changeset
129 ('NM', N_mismatch),
weilong-guo
parents: 0
diff changeset
130 ('XM', methy),
weilong-guo
parents: 0
diff changeset
131 ('XG', output_genome))
weilong-guo
parents: 0
diff changeset
132
weilong-guo
parents: 0
diff changeset
133 self.f.write(a)