Mercurial > repos > lsong10 > psiclass
comparison PsiCLASS-1.0.2/samtools-0.1.19/misc/maq2sam.c @ 0:903fc43d6227 draft default tip
Uploaded
author | lsong10 |
---|---|
date | Fri, 26 Mar 2021 16:52:45 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:903fc43d6227 |
---|---|
1 #include <string.h> | |
2 #include <zlib.h> | |
3 #include <stdio.h> | |
4 #include <inttypes.h> | |
5 #include <stdlib.h> | |
6 #include <assert.h> | |
7 | |
8 #define PACKAGE_VERSION "r439" | |
9 | |
10 //#define MAQ_LONGREADS | |
11 | |
12 #ifdef MAQ_LONGREADS | |
13 # define MAX_READLEN 128 | |
14 #else | |
15 # define MAX_READLEN 64 | |
16 #endif | |
17 | |
18 #define MAX_NAMELEN 36 | |
19 #define MAQMAP_FORMAT_OLD 0 | |
20 #define MAQMAP_FORMAT_NEW -1 | |
21 | |
22 #define PAIRFLAG_FF 0x01 | |
23 #define PAIRFLAG_FR 0x02 | |
24 #define PAIRFLAG_RF 0x04 | |
25 #define PAIRFLAG_RR 0x08 | |
26 #define PAIRFLAG_PAIRED 0x10 | |
27 #define PAIRFLAG_DIFFCHR 0x20 | |
28 #define PAIRFLAG_NOMATCH 0x40 | |
29 #define PAIRFLAG_SW 0x80 | |
30 | |
31 typedef struct | |
32 { | |
33 uint8_t seq[MAX_READLEN]; /* the last base is the single-end mapping quality. */ | |
34 uint8_t size, map_qual, info1, info2, c[2], flag, alt_qual; | |
35 uint32_t seqid, pos; | |
36 int dist; | |
37 char name[MAX_NAMELEN]; | |
38 } maqmap1_t; | |
39 | |
40 typedef struct | |
41 { | |
42 int format, n_ref; | |
43 char **ref_name; | |
44 uint64_t n_mapped_reads; | |
45 maqmap1_t *mapped_reads; | |
46 } maqmap_t; | |
47 | |
48 maqmap_t *maq_new_maqmap() | |
49 { | |
50 maqmap_t *mm = (maqmap_t*)calloc(1, sizeof(maqmap_t)); | |
51 mm->format = MAQMAP_FORMAT_NEW; | |
52 return mm; | |
53 } | |
54 void maq_delete_maqmap(maqmap_t *mm) | |
55 { | |
56 int i; | |
57 if (mm == 0) return; | |
58 for (i = 0; i < mm->n_ref; ++i) | |
59 free(mm->ref_name[i]); | |
60 free(mm->ref_name); | |
61 free(mm->mapped_reads); | |
62 free(mm); | |
63 } | |
64 maqmap_t *maqmap_read_header(gzFile fp) | |
65 { | |
66 maqmap_t *mm; | |
67 int k, len; | |
68 mm = maq_new_maqmap(); | |
69 gzread(fp, &mm->format, sizeof(int)); | |
70 if (mm->format != MAQMAP_FORMAT_NEW) { | |
71 if (mm->format > 0) { | |
72 fprintf(stderr, "** Obsolete map format is detected. Please use 'mapass2maq' command to convert the format.\n"); | |
73 exit(3); | |
74 } | |
75 assert(mm->format == MAQMAP_FORMAT_NEW); | |
76 } | |
77 gzread(fp, &mm->n_ref, sizeof(int)); | |
78 mm->ref_name = (char**)calloc(mm->n_ref, sizeof(char*)); | |
79 for (k = 0; k != mm->n_ref; ++k) { | |
80 gzread(fp, &len, sizeof(int)); | |
81 mm->ref_name[k] = (char*)malloc(len * sizeof(char)); | |
82 gzread(fp, mm->ref_name[k], len); | |
83 } | |
84 /* read number of mapped reads */ | |
85 gzread(fp, &mm->n_mapped_reads, sizeof(uint64_t)); | |
86 return mm; | |
87 } | |
88 | |
89 void maq2tam_core(gzFile fp, const char *rg) | |
90 { | |
91 maqmap_t *mm; | |
92 maqmap1_t mm1, *m1; | |
93 int ret; | |
94 m1 = &mm1; | |
95 mm = maqmap_read_header(fp); | |
96 while ((ret = gzread(fp, m1, sizeof(maqmap1_t))) == sizeof(maqmap1_t)) { | |
97 int j, flag = 0, se_mapq = m1->seq[MAX_READLEN-1]; | |
98 if (m1->flag) flag |= 1; | |
99 if ((m1->flag&PAIRFLAG_PAIRED) || ((m1->flag&PAIRFLAG_SW) && m1->flag != 192)) flag |= 2; | |
100 if (m1->flag == 192) flag |= 4; | |
101 if (m1->flag == 64) flag |= 8; | |
102 if (m1->pos&1) flag |= 0x10; | |
103 if ((flag&1) && m1->dist != 0) { | |
104 int c; | |
105 if (m1->dist > 0) { | |
106 if (m1->flag&(PAIRFLAG_FF|PAIRFLAG_RF)) c = 0; | |
107 else if (m1->flag&(PAIRFLAG_FR|PAIRFLAG_RR)) c = 1; | |
108 else c = m1->pos&1; | |
109 } else { | |
110 if (m1->flag&(PAIRFLAG_FF|PAIRFLAG_FR)) c = 0; | |
111 else if (m1->flag&(PAIRFLAG_RF|PAIRFLAG_RR)) c = 1; | |
112 else c = m1->pos&1; | |
113 } | |
114 if (c) flag |= 0x20; | |
115 } | |
116 if (m1->flag) { | |
117 int l = strlen(m1->name); | |
118 if (m1->name[l-2] == '/') { | |
119 flag |= (m1->name[l-1] == '1')? 0x40 : 0x80; | |
120 m1->name[l-2] = '\0'; | |
121 } | |
122 } | |
123 printf("%s\t%d\t", m1->name, flag); | |
124 printf("%s\t%d\t", mm->ref_name[m1->seqid], (m1->pos>>1)+1); | |
125 if (m1->flag == 130) { | |
126 int c = (int8_t)m1->seq[MAX_READLEN-1]; | |
127 printf("%d\t", m1->alt_qual); | |
128 if (c == 0) printf("%dM\t", m1->size); | |
129 else { | |
130 if (c > 0) printf("%dM%dI%dM\t", m1->map_qual, c, m1->size - m1->map_qual - c); | |
131 else printf("%dM%dD%dM\t", m1->map_qual, -c, m1->size - m1->map_qual); | |
132 } | |
133 se_mapq = 0; // zero SE mapQ for reads aligned by SW | |
134 } else { | |
135 if (flag&4) printf("0\t*\t"); | |
136 else printf("%d\t%dM\t", m1->map_qual, m1->size); | |
137 } | |
138 printf("*\t0\t%d\t", m1->dist); | |
139 for (j = 0; j != m1->size; ++j) { | |
140 if (m1->seq[j] == 0) putchar('N'); | |
141 else putchar("ACGT"[m1->seq[j]>>6&3]); | |
142 } | |
143 putchar('\t'); | |
144 for (j = 0; j != m1->size; ++j) | |
145 putchar((m1->seq[j]&0x3f) + 33); | |
146 putchar('\t'); | |
147 if (rg) printf("RG:Z:%s\t", rg); | |
148 if (flag&4) { // unmapped | |
149 printf("MF:i:%d\n", m1->flag); | |
150 } else { | |
151 printf("MF:i:%d\t", m1->flag); | |
152 if (m1->flag) printf("AM:i:%d\tSM:i:%d\t", m1->alt_qual, se_mapq); | |
153 printf("NM:i:%d\tUQ:i:%d\tH0:i:%d\tH1:i:%d\n", m1->info1&0xf, m1->info2, m1->c[0], m1->c[1]); | |
154 } | |
155 } | |
156 if (ret > 0) | |
157 fprintf(stderr, "Truncated! Continue anyway.\n"); | |
158 maq_delete_maqmap(mm); | |
159 } | |
160 | |
161 int main(int argc, char *argv[]) | |
162 { | |
163 gzFile fp; | |
164 if (argc == 1) { | |
165 fprintf(stderr, "Version: %s\n", PACKAGE_VERSION); | |
166 fprintf(stderr, "Usage: maq2sam <in.map> [<readGroup>]\n"); | |
167 return 1; | |
168 } | |
169 fp = strcmp(argv[1], "-")? gzopen(argv[1], "r") : gzdopen(fileno(stdin), "r"); | |
170 maq2tam_core(fp, argc > 2? argv[2] : 0); | |
171 gzclose(fp); | |
172 return 0; | |
173 } |