annotate PsiCLASS-1.0.2/samtools-0.1.19/misc/maq2sam.c @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1 #include <string.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2 #include <zlib.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3 #include <stdio.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
4 #include <inttypes.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
5 #include <stdlib.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
6 #include <assert.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
7
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
8 #define PACKAGE_VERSION "r439"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
9
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
10 //#define MAQ_LONGREADS
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
11
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
12 #ifdef MAQ_LONGREADS
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
13 # define MAX_READLEN 128
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
14 #else
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
15 # define MAX_READLEN 64
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
16 #endif
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
17
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
18 #define MAX_NAMELEN 36
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
19 #define MAQMAP_FORMAT_OLD 0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
20 #define MAQMAP_FORMAT_NEW -1
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
21
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
22 #define PAIRFLAG_FF 0x01
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
23 #define PAIRFLAG_FR 0x02
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
24 #define PAIRFLAG_RF 0x04
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
25 #define PAIRFLAG_RR 0x08
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
26 #define PAIRFLAG_PAIRED 0x10
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
27 #define PAIRFLAG_DIFFCHR 0x20
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
28 #define PAIRFLAG_NOMATCH 0x40
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
29 #define PAIRFLAG_SW 0x80
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
30
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
31 typedef struct
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
32 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
33 uint8_t seq[MAX_READLEN]; /* the last base is the single-end mapping quality. */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
34 uint8_t size, map_qual, info1, info2, c[2], flag, alt_qual;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
35 uint32_t seqid, pos;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
36 int dist;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
37 char name[MAX_NAMELEN];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
38 } maqmap1_t;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
39
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
40 typedef struct
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
41 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
42 int format, n_ref;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
43 char **ref_name;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
44 uint64_t n_mapped_reads;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
45 maqmap1_t *mapped_reads;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
46 } maqmap_t;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
47
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
48 maqmap_t *maq_new_maqmap()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
49 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
50 maqmap_t *mm = (maqmap_t*)calloc(1, sizeof(maqmap_t));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
51 mm->format = MAQMAP_FORMAT_NEW;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
52 return mm;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
53 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
54 void maq_delete_maqmap(maqmap_t *mm)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
55 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
56 int i;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
57 if (mm == 0) return;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
58 for (i = 0; i < mm->n_ref; ++i)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
59 free(mm->ref_name[i]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
60 free(mm->ref_name);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
61 free(mm->mapped_reads);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
62 free(mm);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
63 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
64 maqmap_t *maqmap_read_header(gzFile fp)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
65 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
66 maqmap_t *mm;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
67 int k, len;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
68 mm = maq_new_maqmap();
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
69 gzread(fp, &mm->format, sizeof(int));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
70 if (mm->format != MAQMAP_FORMAT_NEW) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
71 if (mm->format > 0) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
72 fprintf(stderr, "** Obsolete map format is detected. Please use 'mapass2maq' command to convert the format.\n");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
73 exit(3);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
74 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
75 assert(mm->format == MAQMAP_FORMAT_NEW);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
76 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
77 gzread(fp, &mm->n_ref, sizeof(int));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
78 mm->ref_name = (char**)calloc(mm->n_ref, sizeof(char*));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
79 for (k = 0; k != mm->n_ref; ++k) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
80 gzread(fp, &len, sizeof(int));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
81 mm->ref_name[k] = (char*)malloc(len * sizeof(char));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
82 gzread(fp, mm->ref_name[k], len);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
83 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
84 /* read number of mapped reads */
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
85 gzread(fp, &mm->n_mapped_reads, sizeof(uint64_t));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
86 return mm;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
87 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
88
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
89 void maq2tam_core(gzFile fp, const char *rg)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
90 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
91 maqmap_t *mm;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
92 maqmap1_t mm1, *m1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
93 int ret;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
94 m1 = &mm1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
95 mm = maqmap_read_header(fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
96 while ((ret = gzread(fp, m1, sizeof(maqmap1_t))) == sizeof(maqmap1_t)) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
97 int j, flag = 0, se_mapq = m1->seq[MAX_READLEN-1];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
98 if (m1->flag) flag |= 1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
99 if ((m1->flag&PAIRFLAG_PAIRED) || ((m1->flag&PAIRFLAG_SW) && m1->flag != 192)) flag |= 2;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
100 if (m1->flag == 192) flag |= 4;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
101 if (m1->flag == 64) flag |= 8;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
102 if (m1->pos&1) flag |= 0x10;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
103 if ((flag&1) && m1->dist != 0) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
104 int c;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
105 if (m1->dist > 0) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
106 if (m1->flag&(PAIRFLAG_FF|PAIRFLAG_RF)) c = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
107 else if (m1->flag&(PAIRFLAG_FR|PAIRFLAG_RR)) c = 1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
108 else c = m1->pos&1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
109 } else {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
110 if (m1->flag&(PAIRFLAG_FF|PAIRFLAG_FR)) c = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
111 else if (m1->flag&(PAIRFLAG_RF|PAIRFLAG_RR)) c = 1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
112 else c = m1->pos&1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
113 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
114 if (c) flag |= 0x20;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
115 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
116 if (m1->flag) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
117 int l = strlen(m1->name);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
118 if (m1->name[l-2] == '/') {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
119 flag |= (m1->name[l-1] == '1')? 0x40 : 0x80;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
120 m1->name[l-2] = '\0';
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
121 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
122 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
123 printf("%s\t%d\t", m1->name, flag);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
124 printf("%s\t%d\t", mm->ref_name[m1->seqid], (m1->pos>>1)+1);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
125 if (m1->flag == 130) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
126 int c = (int8_t)m1->seq[MAX_READLEN-1];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
127 printf("%d\t", m1->alt_qual);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
128 if (c == 0) printf("%dM\t", m1->size);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
129 else {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
130 if (c > 0) printf("%dM%dI%dM\t", m1->map_qual, c, m1->size - m1->map_qual - c);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
131 else printf("%dM%dD%dM\t", m1->map_qual, -c, m1->size - m1->map_qual);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
132 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
133 se_mapq = 0; // zero SE mapQ for reads aligned by SW
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
134 } else {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
135 if (flag&4) printf("0\t*\t");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
136 else printf("%d\t%dM\t", m1->map_qual, m1->size);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
137 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
138 printf("*\t0\t%d\t", m1->dist);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
139 for (j = 0; j != m1->size; ++j) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
140 if (m1->seq[j] == 0) putchar('N');
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
141 else putchar("ACGT"[m1->seq[j]>>6&3]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
142 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
143 putchar('\t');
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
144 for (j = 0; j != m1->size; ++j)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
145 putchar((m1->seq[j]&0x3f) + 33);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
146 putchar('\t');
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
147 if (rg) printf("RG:Z:%s\t", rg);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
148 if (flag&4) { // unmapped
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
149 printf("MF:i:%d\n", m1->flag);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
150 } else {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
151 printf("MF:i:%d\t", m1->flag);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
152 if (m1->flag) printf("AM:i:%d\tSM:i:%d\t", m1->alt_qual, se_mapq);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
153 printf("NM:i:%d\tUQ:i:%d\tH0:i:%d\tH1:i:%d\n", m1->info1&0xf, m1->info2, m1->c[0], m1->c[1]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
154 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
155 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
156 if (ret > 0)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
157 fprintf(stderr, "Truncated! Continue anyway.\n");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
158 maq_delete_maqmap(mm);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
159 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
160
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
161 int main(int argc, char *argv[])
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
162 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
163 gzFile fp;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
164 if (argc == 1) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
165 fprintf(stderr, "Version: %s\n", PACKAGE_VERSION);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
166 fprintf(stderr, "Usage: maq2sam <in.map> [<readGroup>]\n");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
167 return 1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
168 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
169 fp = strcmp(argv[1], "-")? gzopen(argv[1], "r") : gzdopen(fileno(stdin), "r");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
170 maq2tam_core(fp, argc > 2? argv[2] : 0);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
171 gzclose(fp);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
172 return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
173 }