0
|
1 /*
|
|
2 Copyright (c) 2010-2013 Genome Research Ltd.
|
|
3 Author: James Bonfield <jkb@sanger.ac.uk>
|
|
4
|
|
5 Redistribution and use in source and binary forms, with or without
|
|
6 modification, are permitted provided that the following conditions are met:
|
|
7
|
|
8 1. Redistributions of source code must retain the above copyright notice,
|
|
9 this list of conditions and the following disclaimer.
|
|
10
|
|
11 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
12 this list of conditions and the following disclaimer in the documentation
|
|
13 and/or other materials provided with the distribution.
|
|
14
|
|
15 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
|
|
16 Institute nor the names of its contributors may be used to endorse or promote
|
|
17 products derived from this software without specific prior written permission.
|
|
18
|
|
19 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
|
|
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
22 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
|
|
23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
29 */
|
|
30
|
|
31 #include <assert.h>
|
|
32 #include <string.h>
|
|
33 #include <stdlib.h>
|
|
34
|
|
35 #include "cram/cram.h"
|
|
36 #include "htslib/sam.h"
|
|
37
|
|
38 /*---------------------------------------------------------------------------
|
|
39 * Samtools compatibility portion
|
|
40 */
|
|
41 int bam_construct_seq(bam_seq_t **bp, size_t extra_len,
|
|
42 const char *qname, size_t qname_len,
|
|
43 int flag,
|
|
44 int rname, // Ref ID
|
|
45 int pos,
|
|
46 int end, // aligned start/end coords
|
|
47 int mapq,
|
|
48 uint32_t ncigar, const uint32_t *cigar,
|
|
49 int mrnm, // Mate Ref ID
|
|
50 int mpos,
|
|
51 int isize,
|
|
52 int len,
|
|
53 const char *seq,
|
|
54 const char *qual) {
|
|
55 static const char L[256] = {
|
|
56 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
|
|
57 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
|
|
58 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
|
|
59 15,15,15,15,15,15,15,15,15,15,15,15,15, 0,15,15,
|
|
60 15, 1,14, 2,13,15,15, 4,11,15,15,12,15, 3,15,15,
|
|
61 15,15, 5, 6, 8,15, 7, 9,15,10,15,15,15,15,15,15,
|
|
62 15, 1,14, 2,13,15,15, 4,11,15,15,12,15, 3,15,15,
|
|
63 15,15, 5, 6, 8,15, 7, 9,15,10,15,15,15,15,15,15,
|
|
64 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
|
|
65 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
|
|
66 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
|
|
67 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
|
|
68 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
|
|
69 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
|
|
70 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
|
|
71 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15
|
|
72 };
|
|
73 bam1_t *b = (bam1_t *)*bp;
|
|
74 uint8_t *cp;
|
|
75 int i, bam_len;
|
|
76
|
|
77 //b->l_aux = extra_len; // we fill this out later
|
|
78
|
|
79 bam_len = qname_len + 1 + ncigar*4 + (len+1)/2 + len + extra_len;
|
|
80 if (b->m_data < bam_len) {
|
|
81 b->m_data = bam_len;
|
|
82 kroundup32(b->m_data);
|
|
83 b->data = (uint8_t*)realloc(b->data, b->m_data);
|
|
84 if (!b->data)
|
|
85 return -1;
|
|
86 }
|
|
87 b->l_data = bam_len;
|
|
88
|
|
89 b->core.tid = rname;
|
|
90 b->core.pos = pos-1;
|
|
91 b->core.bin = bam_reg2bin(pos, end);
|
|
92 b->core.qual = mapq;
|
|
93 b->core.l_qname = qname_len+1;
|
|
94 b->core.flag = flag;
|
|
95 b->core.n_cigar = ncigar;
|
|
96 b->core.l_qseq = len;
|
|
97 b->core.mtid = mrnm;
|
|
98 b->core.mpos = mpos-1;
|
|
99 b->core.isize = isize;
|
|
100
|
|
101 cp = b->data;
|
|
102
|
|
103 strncpy((char *)cp, qname, qname_len);
|
|
104 cp[qname_len] = 0;
|
|
105 cp += qname_len+1;
|
|
106 memcpy(cp, cigar, ncigar*4);
|
|
107 cp += ncigar*4;
|
|
108
|
|
109 for (i = 0; i+1 < len; i+=2) {
|
|
110 *cp++ = (L[(uc)seq[i]]<<4) + L[(uc)seq[i+1]];
|
|
111 }
|
|
112 if (i < len)
|
|
113 *cp++ = L[(uc)seq[i]]<<4;
|
|
114
|
|
115 if (qual)
|
|
116 memcpy(cp, qual, len);
|
|
117 else
|
|
118 memset(cp, '\xff', len);
|
|
119
|
|
120 return 0;
|
|
121 }
|
|
122
|
|
123 bam_hdr_t *cram_header_to_bam(SAM_hdr *h) {
|
|
124 int i;
|
|
125 bam_hdr_t *header = bam_hdr_init();
|
|
126
|
|
127 header->l_text = ks_len(&h->text);
|
|
128 header->text = malloc(header->l_text+1);
|
|
129 memcpy(header->text, ks_str(&h->text), header->l_text);
|
|
130 header->text[header->l_text] = 0;
|
|
131
|
|
132 header->n_targets = h->nref;
|
|
133 header->target_name = (char **)calloc(header->n_targets,
|
|
134 sizeof(char *));
|
|
135 header->target_len = (uint32_t *)calloc(header->n_targets, 4);
|
|
136
|
|
137 for (i = 0; i < h->nref; i++) {
|
|
138 header->target_name[i] = strdup(h->ref[i].name);
|
|
139 header->target_len[i] = h->ref[i].len;
|
|
140 }
|
|
141
|
|
142 return header;
|
|
143 }
|
|
144
|
|
145 SAM_hdr *bam_header_to_cram(bam_hdr_t *h) {
|
|
146 return sam_hdr_parse_(h->text, h->l_text);
|
|
147 }
|