0
|
1 try :
|
|
2 import pysam
|
|
3 except ImportError :
|
|
4 print "[Error] It seems that you haven't install \"pysam\" package.. Please do it before you run this script."
|
|
5 exit(-1)
|
|
6
|
|
7 import sys
|
|
8 from bs_align_utils import *
|
|
9
|
|
10 BAM = 'bam'
|
|
11 SAM = 'sam'
|
|
12 BS_SEEKER1 = 'bs_seeker1'
|
|
13
|
|
14 formats = [BAM, SAM, BS_SEEKER1]
|
|
15
|
|
16 class outfile:
|
|
17 def __init__(self, filename, format, chrom_len, cmd_line, suppress_SAM_header):
|
|
18 self.filename = filename
|
|
19 self.format = format
|
|
20 self.chrom_ids = dict((k, i) for i, k in enumerate(sorted(chrom_len)))
|
|
21
|
|
22 if format == BS_SEEKER1:
|
|
23 self.f = open(filename, 'w')
|
|
24 elif format in [SAM, BAM]:
|
|
25 header = { 'HD' : { 'VN': '1.0'},
|
|
26 'SQ' : [ {'LN' : chrom_len[c], 'SN' : c} for c in sorted(chrom_len) ],
|
|
27 'PG' : [ { 'ID' : 1, 'PN' : 'BS Seeker 2', 'CL' : cmd_line} ]
|
|
28 }
|
|
29 self.f = pysam.Samfile(filename, 'w' + ('b' if format == BAM else ('' if suppress_SAM_header else 'h')), header = header)
|
|
30
|
|
31
|
|
32
|
|
33 def close(self):
|
|
34 self.f.close()
|
|
35
|
|
36 def store(self, qname, N_mismatch, FR, refname, strand, pos, cigar, original_BS, methy, STEVE, rnext = -1, pnext = -1, qual = None, output_genome = None,
|
|
37 rrbs = False, my_region_serial = -1, my_region_start = 0, my_region_end = 0):
|
|
38
|
|
39 if self.format == BS_SEEKER1:
|
|
40
|
|
41 # remove the soft clipped bases from the read
|
|
42 # this is done for backwards compatibility with the old format
|
|
43 r_start, r_end, _ = get_read_start_end_and_genome_length(cigar)
|
|
44 original_BS = original_BS[r_start : r_end]
|
|
45
|
|
46 if rrbs:
|
|
47 self.f.write('%s\t%2d\t%s\t%s%s%s\t%s\t%s\t%s\t%d\n' % (qname, N_mismatch, FR, refname, strand, str(pos+1).zfill(10), output_genome, original_BS, methy, STEVE))
|
|
48 else:
|
|
49 self.f.write('%s\t%2d\t%s\t%s%s%s\t%s\t%s\t%s\t%d\t%d\t%d\t%d\n' % (qname, N_mismatch, FR, refname, strand, str(pos+1).zfill(10), output_genome, original_BS, methy, my_region_serial, my_region_start, my_region_end, STEVE))
|
|
50
|
|
51
|
|
52 elif self.format == BAM or self.format == SAM:
|
|
53
|
|
54 a = pysam.AlignedRead()
|
|
55 a.qname = qname
|
|
56 a.seq = original_BS if strand == '+' else reverse_compl_seq(original_BS)
|
|
57 a.flag = 0x10 if strand == '-' else 0
|
|
58 a.tid = self.chrom_ids[refname]
|
|
59 a.pos = pos
|
|
60 a.mapq = 255
|
|
61 a.cigar = cigar if strand == '+' else list(reversed(cigar))
|
|
62 a.rnext = rnext if rnext == -1 else self.chrom_ids[rnext]
|
|
63 a.pnext = pnext
|
|
64 a.qual= qual
|
|
65 if rrbs:
|
|
66 a.tags = (('XO', FR),
|
|
67 ('XS', STEVE),
|
|
68 ('NM', N_mismatch),
|
|
69 ('XM', methy),
|
|
70 ('XG', output_genome),
|
|
71 ('YR', my_region_serial),
|
|
72 ('YS', my_region_start),
|
|
73 ('YE', my_region_end)
|
|
74 )
|
|
75
|
|
76 else:
|
|
77 a.tags = (('XO', FR),
|
|
78 ('XS', STEVE),
|
|
79 ('NM', N_mismatch),
|
|
80 ('XM', methy),
|
|
81 ('XG', output_genome))
|
|
82
|
|
83 self.f.write(a)
|
1
|
84
|
|
85
|
|
86 def store2(self, qname, flag, N_mismatch, FR, refname, strand, pos, cigar, original_BS, methy, STEVE, rnext = -1, pnext = -1, qual = None, output_genome = None,
|
|
87 rrbs = False, my_region_serial = -1, my_region_start = 0, my_region_end = 0):
|
|
88
|
|
89 if self.format == BS_SEEKER1:
|
|
90
|
|
91 # remove the soft clipped bases from the read
|
|
92 # this is done for backwards compatibility with the old format
|
|
93 r_start, r_end, _ = get_read_start_end_and_genome_length(cigar)
|
|
94 original_BS = original_BS[r_start : r_end]
|
|
95
|
|
96 if rrbs:
|
|
97 self.f.write('%s\t%2d\t%s\t%s%s%s\t%s\t%s\t%s\t%d\n' % (qname, N_mismatch, FR, refname, strand, str(pos+1).zfill(10), output_genome, original_BS, methy, STEVE))
|
|
98 else:
|
|
99 self.f.write('%s\t%2d\t%s\t%s%s%s\t%s\t%s\t%s\t%d\t%d\t%d\t%d\n' % (qname, N_mismatch, FR, refname, strand, str(pos+1).zfill(10), output_genome, original_BS, methy, my_region_serial, my_region_start, my_region_end, STEVE))
|
|
100
|
|
101
|
|
102 elif self.format == BAM or self.format == SAM:
|
|
103
|
|
104 a = pysam.AlignedRead()
|
|
105 a.qname = qname
|
|
106 a.seq = original_BS if strand == '+' else reverse_compl_seq(original_BS)
|
|
107 a.flag = flag
|
|
108 a.tid = self.chrom_ids[refname]
|
|
109 a.pos = pos
|
|
110 a.mapq = 255
|
|
111 a.cigar = cigar if strand == '+' else list(reversed(cigar))
|
|
112 a.rnext = rnext if rnext == -1 else self.chrom_ids[rnext]
|
|
113 a.pnext = pnext
|
|
114 a.qual= qual
|
|
115 if rrbs:
|
|
116 a.tags = (('XO', FR),
|
|
117 ('XS', STEVE),
|
|
118 ('NM', N_mismatch),
|
|
119 ('XM', methy),
|
|
120 ('XG', output_genome),
|
|
121 ('YR', my_region_serial),
|
|
122 ('YS', my_region_start),
|
|
123 ('YE', my_region_end)
|
|
124 )
|
|
125
|
|
126 else:
|
|
127 a.tags = (('XO', FR),
|
|
128 ('XS', STEVE),
|
|
129 ('NM', N_mismatch),
|
|
130 ('XM', methy),
|
|
131 ('XG', output_genome))
|
|
132
|
|
133 self.f.write(a)
|