annotate PsiCLASS-1.0.2/samtools-0.1.19/bam_mate.c @ 0:903fc43d6227 draft default tip

Uploaded
author lsong10
date Fri, 26 Mar 2021 16:52:45 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
1 #include <stdlib.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
2 #include <string.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
3 #include <unistd.h>
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
4 #include "kstring.h"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
5 #include "bam.h"
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
6
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
7 void bam_template_cigar(bam1_t *b1, bam1_t *b2, kstring_t *str)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
8 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
9 bam1_t *swap;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
10 int i, end;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
11 uint32_t *cigar;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
12 str->l = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
13 if (b1->core.tid != b2->core.tid || b1->core.tid < 0) return; // coordinateless or not on the same chr; skip
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
14 if (b1->core.pos > b2->core.pos) swap = b1, b1 = b2, b2 = swap; // make sure b1 has a smaller coordinate
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
15 kputc((b1->core.flag & BAM_FREAD1)? '1' : '2', str); // segment index
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
16 kputc((b1->core.flag & BAM_FREVERSE)? 'R' : 'F', str); // strand
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
17 for (i = 0, cigar = bam1_cigar(b1); i < b1->core.n_cigar; ++i) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
18 kputw(bam_cigar_oplen(cigar[i]), str);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
19 kputc(bam_cigar_opchr(cigar[i]), str);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
20 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
21 end = bam_calend(&b1->core, cigar);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
22 kputw(b2->core.pos - end, str);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
23 kputc('T', str);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
24 kputc((b2->core.flag & BAM_FREAD1)? '1' : '2', str); // segment index
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
25 kputc((b2->core.flag & BAM_FREVERSE)? 'R' : 'F', str); // strand
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
26 for (i = 0, cigar = bam1_cigar(b2); i < b2->core.n_cigar; ++i) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
27 kputw(bam_cigar_oplen(cigar[i]), str);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
28 kputc(bam_cigar_opchr(cigar[i]), str);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
29 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
30 bam_aux_append(b1, "CT", 'Z', str->l+1, (uint8_t*)str->s);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
31 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
32
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
33 // currently, this function ONLY works if each read has one hit
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
34 void bam_mating_core(bamFile in, bamFile out, int remove_reads)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
35 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
36 bam_header_t *header;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
37 bam1_t *b[2];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
38 int curr, has_prev, pre_end = 0, cur_end;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
39 kstring_t str;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
40
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
41 str.l = str.m = 0; str.s = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
42 header = bam_header_read(in);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
43 bam_header_write(out, header);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
44
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
45 b[0] = bam_init1();
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
46 b[1] = bam_init1();
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
47 curr = 0; has_prev = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
48 while (bam_read1(in, b[curr]) >= 0) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
49 bam1_t *cur = b[curr], *pre = b[1-curr];
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
50 if (cur->core.tid < 0)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
51 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
52 if ( !remove_reads ) bam_write1(out, cur);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
53 continue;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
54 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
55 cur_end = bam_calend(&cur->core, bam1_cigar(cur));
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
56 if (cur_end > (int)header->target_len[cur->core.tid]) cur->core.flag |= BAM_FUNMAP;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
57 if (cur->core.flag & BAM_FSECONDARY)
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
58 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
59 if ( !remove_reads ) bam_write1(out, cur);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
60 continue; // skip secondary alignments
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
61 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
62 if (has_prev) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
63 if (strcmp(bam1_qname(cur), bam1_qname(pre)) == 0) { // identical pair name
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
64 cur->core.mtid = pre->core.tid; cur->core.mpos = pre->core.pos;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
65 pre->core.mtid = cur->core.tid; pre->core.mpos = cur->core.pos;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
66 if (pre->core.tid == cur->core.tid && !(cur->core.flag&(BAM_FUNMAP|BAM_FMUNMAP))
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
67 && !(pre->core.flag&(BAM_FUNMAP|BAM_FMUNMAP))) // set TLEN/ISIZE
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
68 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
69 uint32_t cur5, pre5;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
70 cur5 = (cur->core.flag&BAM_FREVERSE)? cur_end : cur->core.pos;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
71 pre5 = (pre->core.flag&BAM_FREVERSE)? pre_end : pre->core.pos;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
72 cur->core.isize = pre5 - cur5; pre->core.isize = cur5 - pre5;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
73 } else cur->core.isize = pre->core.isize = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
74 if (pre->core.flag&BAM_FREVERSE) cur->core.flag |= BAM_FMREVERSE;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
75 else cur->core.flag &= ~BAM_FMREVERSE;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
76 if (cur->core.flag&BAM_FREVERSE) pre->core.flag |= BAM_FMREVERSE;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
77 else pre->core.flag &= ~BAM_FMREVERSE;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
78 if (cur->core.flag & BAM_FUNMAP) { pre->core.flag |= BAM_FMUNMAP; pre->core.flag &= ~BAM_FPROPER_PAIR; }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
79 if (pre->core.flag & BAM_FUNMAP) { cur->core.flag |= BAM_FMUNMAP; cur->core.flag &= ~BAM_FPROPER_PAIR; }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
80 bam_template_cigar(pre, cur, &str);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
81 bam_write1(out, pre);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
82 bam_write1(out, cur);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
83 has_prev = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
84 } else { // unpaired or singleton
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
85 pre->core.mtid = -1; pre->core.mpos = -1; pre->core.isize = 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
86 if (pre->core.flag & BAM_FPAIRED) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
87 pre->core.flag |= BAM_FMUNMAP;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
88 pre->core.flag &= ~BAM_FMREVERSE & ~BAM_FPROPER_PAIR;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
89 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
90 bam_write1(out, pre);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
91 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
92 } else has_prev = 1;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
93 curr = 1 - curr;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
94 pre_end = cur_end;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
95 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
96 if (has_prev) bam_write1(out, b[1-curr]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
97 bam_header_destroy(header);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
98 bam_destroy1(b[0]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
99 bam_destroy1(b[1]);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
100 free(str.s);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
101 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
102
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
103 void usage()
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
104 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
105 fprintf(stderr,"Usage: samtools fixmate <in.nameSrt.bam> <out.nameSrt.bam>\n");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
106 fprintf(stderr,"Options:\n");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
107 fprintf(stderr," -r remove unmapped reads and secondary alignments\n");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
108 exit(1);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
109 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
110
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
111 int bam_mating(int argc, char *argv[])
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
112 {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
113 bamFile in, out;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
114 int c, remove_reads=0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
115 while ((c = getopt(argc, argv, "r")) >= 0) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
116 switch (c) {
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
117 case 'r': remove_reads=1; break;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
118 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
119 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
120 if (optind+1 >= argc) usage();
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
121 in = (strcmp(argv[optind], "-") == 0)? bam_dopen(fileno(stdin), "r") : bam_open(argv[optind], "r");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
122 out = (strcmp(argv[optind+1], "-") == 0)? bam_dopen(fileno(stdout), "w") : bam_open(argv[optind+1], "w");
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
123 bam_mating_core(in, out, remove_reads);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
124 bam_close(in); bam_close(out);
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
125 return 0;
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
126 }
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
127
903fc43d6227 Uploaded
lsong10
parents:
diff changeset
128