annotate ezBAMQC/src/htslib/cram/cram_samtools.c @ 0:dfa3745e5fd8

Uploaded
author youngkim
date Thu, 24 Mar 2016 17:12:52 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
1 /*
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
2 Copyright (c) 2010-2013 Genome Research Ltd.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
3 Author: James Bonfield <jkb@sanger.ac.uk>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
4
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
5 Redistribution and use in source and binary forms, with or without
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
6 modification, are permitted provided that the following conditions are met:
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
7
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
8 1. Redistributions of source code must retain the above copyright notice,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
9 this list of conditions and the following disclaimer.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
10
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
11 2. Redistributions in binary form must reproduce the above copyright notice,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
12 this list of conditions and the following disclaimer in the documentation
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
13 and/or other materials provided with the distribution.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
14
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
15 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
16 Institute nor the names of its contributors may be used to endorse or promote
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
17 products derived from this software without specific prior written permission.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
18
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
19 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
20 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
21 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
22 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
23 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
24 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
25 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
26 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
27 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
29 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
30
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
31 #include <assert.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
32 #include <string.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
33 #include <stdlib.h>
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
34
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
35 #include "cram/cram.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
36 #include "htslib/sam.h"
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
37
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
38 /*---------------------------------------------------------------------------
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
39 * Samtools compatibility portion
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
40 */
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
41 int bam_construct_seq(bam_seq_t **bp, size_t extra_len,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
42 const char *qname, size_t qname_len,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
43 int flag,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
44 int rname, // Ref ID
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
45 int pos,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
46 int end, // aligned start/end coords
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
47 int mapq,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
48 uint32_t ncigar, const uint32_t *cigar,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
49 int mrnm, // Mate Ref ID
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
50 int mpos,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
51 int isize,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
52 int len,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
53 const char *seq,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
54 const char *qual) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
55 static const char L[256] = {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
56 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
57 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
58 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
59 15,15,15,15,15,15,15,15,15,15,15,15,15, 0,15,15,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
60 15, 1,14, 2,13,15,15, 4,11,15,15,12,15, 3,15,15,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
61 15,15, 5, 6, 8,15, 7, 9,15,10,15,15,15,15,15,15,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
62 15, 1,14, 2,13,15,15, 4,11,15,15,12,15, 3,15,15,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
63 15,15, 5, 6, 8,15, 7, 9,15,10,15,15,15,15,15,15,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
64 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
65 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
66 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
67 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
68 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
69 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
70 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
71 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
72 };
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
73 bam1_t *b = (bam1_t *)*bp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
74 uint8_t *cp;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
75 int i, bam_len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
76
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
77 //b->l_aux = extra_len; // we fill this out later
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
78
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
79 bam_len = qname_len + 1 + ncigar*4 + (len+1)/2 + len + extra_len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
80 if (b->m_data < bam_len) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
81 b->m_data = bam_len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
82 kroundup32(b->m_data);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
83 b->data = (uint8_t*)realloc(b->data, b->m_data);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
84 if (!b->data)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
85 return -1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
86 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
87 b->l_data = bam_len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
88
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
89 b->core.tid = rname;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
90 b->core.pos = pos-1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
91 b->core.bin = bam_reg2bin(pos, end);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
92 b->core.qual = mapq;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
93 b->core.l_qname = qname_len+1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
94 b->core.flag = flag;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
95 b->core.n_cigar = ncigar;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
96 b->core.l_qseq = len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
97 b->core.mtid = mrnm;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
98 b->core.mpos = mpos-1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
99 b->core.isize = isize;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
100
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
101 cp = b->data;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
102
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
103 strncpy((char *)cp, qname, qname_len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
104 cp[qname_len] = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
105 cp += qname_len+1;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
106 memcpy(cp, cigar, ncigar*4);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
107 cp += ncigar*4;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
108
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
109 for (i = 0; i+1 < len; i+=2) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
110 *cp++ = (L[(uc)seq[i]]<<4) + L[(uc)seq[i+1]];
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
111 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
112 if (i < len)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
113 *cp++ = L[(uc)seq[i]]<<4;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
114
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
115 if (qual)
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
116 memcpy(cp, qual, len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
117 else
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
118 memset(cp, '\xff', len);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
119
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
120 return 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
121 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
122
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
123 bam_hdr_t *cram_header_to_bam(SAM_hdr *h) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
124 int i;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
125 bam_hdr_t *header = bam_hdr_init();
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
126
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
127 header->l_text = ks_len(&h->text);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
128 header->text = malloc(header->l_text+1);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
129 memcpy(header->text, ks_str(&h->text), header->l_text);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
130 header->text[header->l_text] = 0;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
131
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
132 header->n_targets = h->nref;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
133 header->target_name = (char **)calloc(header->n_targets,
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
134 sizeof(char *));
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
135 header->target_len = (uint32_t *)calloc(header->n_targets, 4);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
136
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
137 for (i = 0; i < h->nref; i++) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
138 header->target_name[i] = strdup(h->ref[i].name);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
139 header->target_len[i] = h->ref[i].len;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
140 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
141
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
142 return header;
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
143 }
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
144
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
145 SAM_hdr *bam_header_to_cram(bam_hdr_t *h) {
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
146 return sam_hdr_parse_(h->text, h->l_text);
dfa3745e5fd8 Uploaded
youngkim
parents:
diff changeset
147 }