0
|
1 /* The MIT License
|
|
2
|
|
3 Copyright (c) 2003-2006, 2008, by Heng Li <lh3lh3@gmail.com>
|
|
4
|
|
5 Permission is hereby granted, free of charge, to any person obtaining
|
|
6 a copy of this software and associated documentation files (the
|
|
7 "Software"), to deal in the Software without restriction, including
|
|
8 without limitation the rights to use, copy, modify, merge, publish,
|
|
9 distribute, sublicense, and/or sell copies of the Software, and to
|
|
10 permit persons to whom the Software is furnished to do so, subject to
|
|
11 the following conditions:
|
|
12
|
|
13 The above copyright notice and this permission notice shall be
|
|
14 included in all copies or substantial portions of the Software.
|
|
15
|
|
16 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
17 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
18 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
19 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
20 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
21 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
22 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
23 SOFTWARE.
|
|
24 */
|
|
25
|
|
26 /*
|
|
27 2009-07-23, 0.10.0
|
|
28
|
|
29 - Use 32-bit to store CIGAR
|
|
30
|
|
31 - Report suboptimal aligments
|
|
32
|
|
33 - Implemented half-fixed-half-open DP
|
|
34
|
|
35 2009-04-26, 0.9.10
|
|
36
|
|
37 - Allow to set a threshold for local alignment
|
|
38
|
|
39 2009-02-18, 0.9.9
|
|
40
|
|
41 - Fixed a bug when no residue matches
|
|
42
|
|
43 2008-08-04, 0.9.8
|
|
44
|
|
45 - Fixed the wrong declaration of aln_stdaln_aux()
|
|
46
|
|
47 - Avoid 0 coordinate for global alignment
|
|
48
|
|
49 2008-08-01, 0.9.7
|
|
50
|
|
51 - Change gap_end penalty to 5 in aln_param_bwa
|
|
52
|
|
53 - Add function to convert path_t to the CIGAR format
|
|
54
|
|
55 2008-08-01, 0.9.6
|
|
56
|
|
57 - The first gap now costs (gap_open+gap_ext), instead of
|
|
58 gap_open. Scoring systems are modified accordingly.
|
|
59
|
|
60 - Gap end is now correctly handled. Previously it is not correct.
|
|
61
|
|
62 - Change license to MIT.
|
|
63
|
|
64 */
|
|
65
|
|
66 #ifndef LH3_STDALN_H_
|
|
67 #define LH3_STDALN_H_
|
|
68
|
|
69
|
|
70 #define STDALN_VERSION 0.11.0
|
|
71
|
|
72 #include <stdint.h>
|
|
73
|
|
74 #define FROM_M 0
|
|
75 #define FROM_I 1
|
|
76 #define FROM_D 2
|
|
77 #define FROM_S 3
|
|
78
|
|
79 #define ALN_TYPE_LOCAL 0
|
|
80 #define ALN_TYPE_GLOBAL 1
|
|
81 #define ALN_TYPE_EXTEND 2
|
|
82
|
|
83 /* This is the smallest integer. It might be CPU-dependent in very RARE cases. */
|
|
84 #define MINOR_INF -1073741823
|
|
85
|
|
86 typedef struct
|
|
87 {
|
|
88 int gap_open;
|
|
89 int gap_ext;
|
|
90 int gap_end;
|
|
91
|
|
92 int *matrix;
|
|
93 int row;
|
|
94 int band_width;
|
|
95 } AlnParam;
|
|
96
|
|
97 typedef struct
|
|
98 {
|
|
99 int i, j;
|
|
100 unsigned char ctype;
|
|
101 } path_t;
|
|
102
|
|
103 typedef struct
|
|
104 {
|
|
105 path_t *path; /* for advanced users... :-) */
|
|
106 int path_len; /* for advanced users... :-) */
|
|
107 int start1, end1; /* start and end of the first sequence, coordinations are 1-based */
|
|
108 int start2, end2; /* start and end of the second sequence, coordinations are 1-based */
|
|
109 int score, subo; /* score */
|
|
110
|
|
111 char *out1, *out2; /* print them, and then you will know */
|
|
112 char *outm;
|
|
113
|
|
114 int n_cigar;
|
|
115 uint32_t *cigar32;
|
|
116 } AlnAln;
|
|
117
|
|
118 #ifdef __cplusplus
|
|
119 extern "C" {
|
|
120 #endif
|
|
121
|
|
122 AlnAln *aln_stdaln_aux(const char *seq1, const char *seq2, const AlnParam *ap,
|
|
123 int type, int do_align, int len1, int len2);
|
|
124 AlnAln *aln_stdaln(const char *seq1, const char *seq2, const AlnParam *ap, int type, int do_align);
|
|
125 void aln_free_AlnAln(AlnAln *aa);
|
|
126
|
|
127 int aln_global_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
|
|
128 path_t *path, int *path_len);
|
|
129 int aln_local_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
|
|
130 path_t *path, int *path_len, int _thres, int *_subo);
|
|
131 int aln_extend_core(unsigned char *seq1, int len1, unsigned char *seq2, int len2, const AlnParam *ap,
|
|
132 path_t *path, int *path_len, int G0, uint8_t *_mem);
|
|
133 uint16_t *aln_path2cigar(const path_t *path, int path_len, int *n_cigar);
|
|
134 uint32_t *aln_path2cigar32(const path_t *path, int path_len, int *n_cigar);
|
|
135
|
|
136 #ifdef __cplusplus
|
|
137 }
|
|
138 #endif
|
|
139
|
|
140 /********************
|
|
141 * global variables *
|
|
142 ********************/
|
|
143
|
|
144 extern AlnParam aln_param_bwa; /* = { 37, 9, 0, aln_sm_maq, 5, 50 }; */
|
|
145 extern AlnParam aln_param_blast; /* = { 5, 2, 2, aln_sm_blast, 5, 50 }; */
|
|
146 extern AlnParam aln_param_nt2nt; /* = { 10, 2, 2, aln_sm_nt, 16, 75 }; */
|
|
147 extern AlnParam aln_param_aa2aa; /* = { 20, 19, 19, aln_sm_read, 16, 75 }; */
|
|
148 extern AlnParam aln_param_rd2rd; /* = { 12, 2, 2, aln_sm_blosum62, 22, 50 }; */
|
|
149
|
|
150 /* common nucleotide score matrix for 16 bases */
|
|
151 extern int aln_sm_nt[];
|
|
152
|
|
153 /* BLOSUM62 and BLOSUM45 */
|
|
154 extern int aln_sm_blosum62[], aln_sm_blosum45[];
|
|
155
|
|
156 /* common read for 16 bases. note that read alignment is quite different from common nucleotide alignment */
|
|
157 extern int aln_sm_read[];
|
|
158
|
|
159 /* human-mouse score matrix for 4 bases */
|
|
160 extern int aln_sm_hs[];
|
|
161
|
|
162 #endif
|