annotate bwa-0.7.9a/QSufSort.c @ 0:ce5a8082bbb8 draft

Uploaded
author xilinxu
date Thu, 14 Aug 2014 02:16:48 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
1 /* QSufSort.c
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
2
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
3 Original source from qsufsort.c
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
4
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
5 Copyright 1999, N. Jesper Larsson, all rights reserved.
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
6
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
7 This file contains an implementation of the algorithm presented in "Faster
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
8 Suffix Sorting" by N. Jesper Larsson (jesper@cs.lth.se) and Kunihiko
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
9 Sadakane (sada@is.s.u-tokyo.ac.jp).
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
10
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
11 This software may be used freely for any purpose. However, when distributed,
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
12 the original source must be clearly stated, and, when the source code is
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
13 distributed, the copyright notice must be retained and any alterations in
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
14 the code must be clearly marked. No warranty is given regarding the quality
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
15 of this software.
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
16
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
17 Modified by Wong Chi-Kwong, 2004
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
18
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
19 Changes summary: - Used long variable and function names
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
20 - Removed global variables
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
21 - Replace pointer references with array references
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
22 - Used insertion sort in place of selection sort and increased insertion sort threshold
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
23 - Reconstructing suffix array from inverse becomes an option
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
24 - Add handling where end-of-text symbol is not necessary < all characters
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
25 - Removed codes for supporting alphabet size > number of characters
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
26
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
27 No warrenty is given regarding the quality of the modifications.
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
28
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
29 */
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
30
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
31
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
32 #include <stdio.h>
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
33 #include <stdlib.h>
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
34 #include <limits.h>
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
35 #include "QSufSort.h"
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
36
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
37 #define min(value1, value2) ( ((value1) < (value2)) ? (value1) : (value2) )
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
38 #define med3(a, b, c) ( a<b ? (b<c ? b : a<c ? c : a) : (b>c ? b : a>c ? c : a))
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
39 #define swap(a, b, t); t = a; a = b; b = t;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
40
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
41 // Static functions
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
42 static void QSufSortSortSplit(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t lowestPos,
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
43 const qsint_t highestPos, const qsint_t numSortedChar);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
44 static qsint_t QSufSortChoosePivot(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t lowestPos,
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
45 const qsint_t highestPos, const qsint_t numSortedChar);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
46 static void QSufSortInsertSortSplit(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t lowestPos,
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
47 const qsint_t highestPos, const qsint_t numSortedChar);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
48 static void QSufSortBucketSort(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t numChar, const qsint_t alphabetSize);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
49 static qsint_t QSufSortTransform(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t numChar, const qsint_t largestInputSymbol,
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
50 const qsint_t smallestInputSymbol, const qsint_t maxNewAlphabetSize, qsint_t *numSymbolAggregated);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
51
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
52 /* Makes suffix array p of x. x becomes inverse of p. p and x are both of size
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
53 n+1. Contents of x[0...n-1] are integers in the range l...k-1. Original
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
54 contents of x[n] is disregarded, the n-th symbol being regarded as
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
55 end-of-string smaller than all other symbols.*/
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
56 void QSufSortSuffixSort(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t numChar, const qsint_t largestInputSymbol,
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
57 const qsint_t smallestInputSymbol, const int skipTransform)
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
58 {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
59 qsint_t i, j;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
60 qsint_t s, negatedSortedGroupLength;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
61 qsint_t numSymbolAggregated;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
62 qsint_t numSortedPos = 1;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
63 qsint_t newAlphabetSize;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
64
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
65 if (!skipTransform) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
66 /* bucketing possible*/
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
67 newAlphabetSize = QSufSortTransform(V, I, numChar, largestInputSymbol, smallestInputSymbol,
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
68 numChar, &numSymbolAggregated);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
69 QSufSortBucketSort(V, I, numChar, newAlphabetSize);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
70 I[0] = -1;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
71 V[numChar] = 0;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
72 numSortedPos = numSymbolAggregated;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
73 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
74
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
75 while ((qsint_t)(I[0]) >= -(qsint_t)numChar) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
76 i = 0;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
77 negatedSortedGroupLength = 0;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
78 do {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
79 s = I[i];
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
80 if (s < 0) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
81 i -= s; /* skip over sorted group.*/
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
82 negatedSortedGroupLength += s;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
83 } else {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
84 if (negatedSortedGroupLength) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
85 I[i+negatedSortedGroupLength] = negatedSortedGroupLength; /* combine preceding sorted groups */
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
86 negatedSortedGroupLength = 0;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
87 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
88 j = V[s] + 1;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
89 QSufSortSortSplit(V, I, i, j - 1, numSortedPos);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
90 i = j;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
91 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
92 } while (i <= numChar);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
93 if (negatedSortedGroupLength) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
94 /* array ends with a sorted group.*/
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
95 I[i+negatedSortedGroupLength] = negatedSortedGroupLength; /* combine sorted groups at end of I.*/
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
96 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
97 numSortedPos *= 2; /* double sorted-depth.*/
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
98 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
99 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
100
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
101 void QSufSortGenerateSaFromInverse(const qsint_t* V, qsint_t* __restrict I, const qsint_t numChar)
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
102 {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
103 qsint_t i;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
104 for (i=0; i<=numChar; i++)
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
105 I[V[i]] = i + 1;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
106 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
107
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
108 /* Sorting routine called for each unsorted group. Sorts the array of integers
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
109 (suffix numbers) of length n starting at p. The algorithm is a ternary-split
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
110 quicksort taken from Bentley & McIlroy, "Engineering a Sort Function",
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
111 Software -- Practice and Experience 23(11), 1249-1265 (November 1993). This
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
112 function is based on Program 7.*/
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
113 static void QSufSortSortSplit(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t lowestPos,
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
114 const qsint_t highestPos, const qsint_t numSortedChar) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
115
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
116 qsint_t a, b, c, d;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
117 qsint_t l, m;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
118 qsint_t f, v, s, t;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
119 qsint_t tmp;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
120 qsint_t numItem;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
121
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
122 numItem = highestPos - lowestPos + 1;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
123
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
124 if (numItem <= INSERT_SORT_NUM_ITEM) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
125 QSufSortInsertSortSplit(V, I, lowestPos, highestPos, numSortedChar);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
126 return;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
127 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
128
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
129 v = QSufSortChoosePivot(V, I, lowestPos, highestPos, numSortedChar);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
130
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
131 a = b = lowestPos;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
132 c = d = highestPos;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
133
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
134 while (1) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
135 while (c >= b && (f = KEY(V, I, b, numSortedChar)) <= v) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
136 if (f == v) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
137 swap(I[a], I[b], tmp);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
138 a++;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
139 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
140 b++;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
141 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
142 while (c >= b && (f = KEY(V, I, c, numSortedChar)) >= v) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
143 if (f == v) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
144 swap(I[c], I[d], tmp);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
145 d--;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
146 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
147 c--;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
148 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
149 if (b > c)
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
150 break;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
151 swap(I[b], I[c], tmp);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
152 b++;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
153 c--;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
154 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
155
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
156 s = a - lowestPos;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
157 t = b - a;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
158 s = min(s, t);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
159 for (l = lowestPos, m = b - s; m < b; l++, m++) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
160 swap(I[l], I[m], tmp);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
161 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
162
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
163 s = d - c;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
164 t = highestPos - d;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
165 s = min(s, t);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
166 for (l = b, m = highestPos - s + 1; m <= highestPos; l++, m++) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
167 swap(I[l], I[m], tmp);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
168 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
169
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
170 s = b - a;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
171 t = d - c;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
172 if (s > 0)
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
173 QSufSortSortSplit(V, I, lowestPos, lowestPos + s - 1, numSortedChar);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
174
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
175 // Update group number for equal portion
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
176 a = lowestPos + s;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
177 b = highestPos - t;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
178 if (a == b) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
179 // Sorted group
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
180 V[I[a]] = a;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
181 I[a] = -1;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
182 } else {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
183 // Unsorted group
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
184 for (c=a; c<=b; c++)
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
185 V[I[c]] = b;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
186 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
187
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
188 if (t > 0)
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
189 QSufSortSortSplit(V, I, highestPos - t + 1, highestPos, numSortedChar);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
190
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
191 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
192
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
193 /* Algorithm by Bentley & McIlroy.*/
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
194 static qsint_t QSufSortChoosePivot(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t lowestPos,
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
195 const qsint_t highestPos, const qsint_t numSortedChar) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
196
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
197 qsint_t m;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
198 qsint_t keyl, keym, keyn;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
199 qsint_t key1, key2, key3;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
200 qsint_t s;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
201 qsint_t numItem;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
202
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
203 numItem = highestPos - lowestPos + 1;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
204
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
205 m = lowestPos + numItem / 2;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
206
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
207 s = numItem / 8;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
208 key1 = KEY(V, I, lowestPos, numSortedChar);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
209 key2 = KEY(V, I, lowestPos+s, numSortedChar);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
210 key3 = KEY(V, I, lowestPos+2*s, numSortedChar);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
211 keyl = med3(key1, key2, key3);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
212 key1 = KEY(V, I, m-s, numSortedChar);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
213 key2 = KEY(V, I, m, numSortedChar);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
214 key3 = KEY(V, I, m+s, numSortedChar);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
215 keym = med3(key1, key2, key3);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
216 key1 = KEY(V, I, highestPos-2*s, numSortedChar);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
217 key2 = KEY(V, I, highestPos-s, numSortedChar);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
218 key3 = KEY(V, I, highestPos, numSortedChar);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
219 keyn = med3(key1, key2, key3);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
220
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
221 return med3(keyl, keym, keyn);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
222
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
223
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
224 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
225
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
226 /* Quadratic sorting method to use for small subarrays. */
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
227 static void QSufSortInsertSortSplit(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t lowestPos,
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
228 const qsint_t highestPos, const qsint_t numSortedChar)
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
229 {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
230 qsint_t i, j;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
231 qsint_t tmpKey, tmpPos;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
232 qsint_t numItem;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
233 qsint_t key[INSERT_SORT_NUM_ITEM], pos[INSERT_SORT_NUM_ITEM];
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
234 qsint_t negativeSortedLength;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
235 qsint_t groupNum;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
236
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
237 numItem = highestPos - lowestPos + 1;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
238
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
239 for (i=0; i<numItem; i++) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
240 pos[i] = I[lowestPos + i];
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
241 key[i] = V[pos[i] + numSortedChar];
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
242 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
243
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
244 for (i=1; i<numItem; i++) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
245 tmpKey = key[i];
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
246 tmpPos = pos[i];
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
247 for (j=i; j>0 && key[j-1] > tmpKey; j--) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
248 key[j] = key[j-1];
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
249 pos[j] = pos[j-1];
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
250 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
251 key[j] = tmpKey;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
252 pos[j] = tmpPos;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
253 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
254
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
255 negativeSortedLength = -1;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
256
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
257 i = numItem - 1;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
258 groupNum = highestPos;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
259 while (i > 0) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
260 I[i+lowestPos] = pos[i];
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
261 V[I[i+lowestPos]] = groupNum;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
262 if (key[i-1] == key[i]) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
263 negativeSortedLength = 0;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
264 } else {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
265 if (negativeSortedLength < 0)
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
266 I[i+lowestPos] = negativeSortedLength;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
267 groupNum = i + lowestPos - 1;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
268 negativeSortedLength--;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
269 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
270 i--;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
271 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
272
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
273 I[lowestPos] = pos[0];
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
274 V[I[lowestPos]] = groupNum;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
275 if (negativeSortedLength < 0)
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
276 I[lowestPos] = negativeSortedLength;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
277 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
278
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
279 /* Bucketsort for first iteration.
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
280
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
281 Input: x[0...n-1] holds integers in the range 1...k-1, all of which appear
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
282 at least once. x[n] is 0. (This is the corresponding output of transform.) k
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
283 must be at most n+1. p is array of size n+1 whose contents are disregarded.
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
284
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
285 Output: x is V and p is I after the initial sorting stage of the refined
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
286 suffix sorting algorithm.*/
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
287
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
288 static void QSufSortBucketSort(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t numChar, const qsint_t alphabetSize)
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
289 {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
290 qsint_t i, c;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
291 qsint_t d;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
292 qsint_t groupNum;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
293 qsint_t currentIndex;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
294
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
295 // mark linked list empty
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
296 for (i=0; i<alphabetSize; i++)
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
297 I[i] = -1;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
298
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
299 // insert to linked list
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
300 for (i=0; i<=numChar; i++) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
301 c = V[i];
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
302 V[i] = (qsint_t)(I[c]);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
303 I[c] = i;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
304 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
305
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
306 currentIndex = numChar;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
307 for (i=alphabetSize; i>0; i--) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
308 c = I[i-1];
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
309 d = (qsint_t)(V[c]);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
310 groupNum = currentIndex;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
311 V[c] = groupNum;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
312 if (d >= 0) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
313 I[currentIndex] = c;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
314 while (d >= 0) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
315 c = d;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
316 d = V[c];
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
317 V[c] = groupNum;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
318 currentIndex--;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
319 I[currentIndex] = c;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
320 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
321 } else {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
322 // sorted group
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
323 I[currentIndex] = -1;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
324 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
325 currentIndex--;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
326 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
327 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
328
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
329 /* Transforms the alphabet of x by attempting to aggregate several symbols into
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
330 one, while preserving the suffix order of x. The alphabet may also be
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
331 compacted, so that x on output comprises all integers of the new alphabet
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
332 with no skipped numbers.
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
333
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
334 Input: x is an array of size n+1 whose first n elements are positive
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
335 integers in the range l...k-1. p is array of size n+1, used for temporary
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
336 storage. q controls aggregation and compaction by defining the maximum intue
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
337 for any symbol during transformation: q must be at least k-l; if q<=n,
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
338 compaction is guaranteed; if k-l>n, compaction is never done; if q is
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
339 INT_MAX, the maximum number of symbols are aggregated into one.
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
340
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
341 Output: Returns an integer j in the range 1...q representing the size of the
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
342 new alphabet. If j<=n+1, the alphabet is compacted. The global variable r is
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
343 set to the number of old symbols grouped into one. Only x[n] is 0.*/
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
344 static qsint_t QSufSortTransform(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t numChar, const qsint_t largestInputSymbol,
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
345 const qsint_t smallestInputSymbol, const qsint_t maxNewAlphabetSize, qsint_t *numSymbolAggregated)
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
346 {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
347 qsint_t c, i, j;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
348 qsint_t a; // numSymbolAggregated
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
349 qsint_t mask;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
350 qsint_t minSymbolInChunk = 0, maxSymbolInChunk = 0;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
351 qsint_t newAlphabetSize;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
352 qsint_t maxNumInputSymbol, maxNumBit, maxSymbol;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
353
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
354 maxNumInputSymbol = largestInputSymbol - smallestInputSymbol + 1;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
355
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
356 for (maxNumBit = 0, i = maxNumInputSymbol; i; i >>= 1) ++maxNumBit;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
357 maxSymbol = QSINT_MAX >> maxNumBit;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
358
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
359 c = maxNumInputSymbol;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
360 for (a = 0; a < numChar && maxSymbolInChunk <= maxSymbol && c <= maxNewAlphabetSize; a++) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
361 minSymbolInChunk = (minSymbolInChunk << maxNumBit) | (V[a] - smallestInputSymbol + 1);
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
362 maxSymbolInChunk = c;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
363 c = (maxSymbolInChunk << maxNumBit) | maxNumInputSymbol;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
364 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
365
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
366 mask = (1 << (a-1) * maxNumBit) - 1; /* mask masks off top old symbol from chunk.*/
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
367 V[numChar] = smallestInputSymbol - 1; /* emulate zero terminator.*/
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
368
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
369 /* bucketing possible, compact alphabet.*/
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
370 for (i=0; i<=maxSymbolInChunk; i++)
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
371 I[i] = 0; /* zero transformation table.*/
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
372 c = minSymbolInChunk;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
373 for (i=a; i<=numChar; i++) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
374 I[c] = 1; /* mark used chunk symbol.*/
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
375 c = ((c & mask) << maxNumBit) | (V[i] - smallestInputSymbol + 1); /* shift in next old symbol in chunk.*/
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
376 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
377 for (i=1; i<a; i++) { /* handle last r-1 positions.*/
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
378 I[c] = 1; /* mark used chunk symbol.*/
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
379 c = (c & mask) << maxNumBit; /* shift in next old symbol in chunk.*/
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
380 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
381 newAlphabetSize = 1;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
382 for (i=0; i<=maxSymbolInChunk; i++) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
383 if (I[i]) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
384 I[i] = newAlphabetSize;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
385 newAlphabetSize++;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
386 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
387 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
388 c = minSymbolInChunk;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
389 for (i=0, j=a; j<=numChar; i++, j++) {
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
390 V[i] = I[c]; /* transform to new alphabet.*/
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
391 c = ((c & mask) << maxNumBit) | (V[j] - smallestInputSymbol + 1); /* shift in next old symbol in chunk.*/
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
392 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
393 for (; i<numChar; i++) { /* handle last a-1 positions.*/
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
394 V[i] = I[c]; /* transform to new alphabet.*/
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
395 c = (c & mask) << maxNumBit; /* shift right-end zero in chunk.*/
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
396 }
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
397
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
398 V[numChar] = 0; /* end-of-string symbol is zero.*/
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
399
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
400 *numSymbolAggregated = a;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
401 return newAlphabetSize;
ce5a8082bbb8 Uploaded
xilinxu
parents:
diff changeset
402 }