Mercurial > repos > youngkim > ezbamqc
comparison ezBAMQC/src/htslib/cram/open_trace_file.c @ 0:dfa3745e5fd8
Uploaded
author | youngkim |
---|---|
date | Thu, 24 Mar 2016 17:12:52 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:dfa3745e5fd8 |
---|---|
1 /* | |
2 Author: James Bonfield | |
3 | |
4 Copyright (c) 2000-2001 MEDICAL RESEARCH COUNCIL | |
5 All rights reserved | |
6 | |
7 Redistribution and use in source and binary forms, with or without | |
8 modification, are permitted provided that the following conditions are met: | |
9 | |
10 1. Redistributions of source code must retain the above copyright notice, | |
11 this list of conditions and the following disclaimer. | |
12 | |
13 2. Redistributions in binary form must reproduce the above copyright notice, | |
14 this list of conditions and the following disclaimer in the documentation | |
15 and/or other materials provided with the distribution. | |
16 | |
17 3. Neither the name of the MEDICAL RESEARCH COUNCIL, THE LABORATORY OF | |
18 MOLECULAR BIOLOGY nor the names of its contributors may be used to endorse or | |
19 promote products derived from this software without specific prior written | |
20 permission. | |
21 | |
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND | |
23 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
24 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
25 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR | |
26 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES | |
27 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; | |
28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON | |
29 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
30 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |
31 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
32 */ | |
33 | |
34 /* | |
35 Copyright (c) 2008, 2009, 2013, 2014 Genome Research Ltd. | |
36 Author: James Bonfield <jkb@sanger.ac.uk> | |
37 | |
38 Redistribution and use in source and binary forms, with or without | |
39 modification, are permitted provided that the following conditions are met: | |
40 | |
41 1. Redistributions of source code must retain the above copyright notice, | |
42 this list of conditions and the following disclaimer. | |
43 | |
44 2. Redistributions in binary form must reproduce the above copyright notice, | |
45 this list of conditions and the following disclaimer in the documentation | |
46 and/or other materials provided with the distribution. | |
47 | |
48 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger | |
49 Institute nor the names of its contributors may be used to endorse or promote | |
50 products derived from this software without specific prior written permission. | |
51 | |
52 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND | |
53 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
54 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
55 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE | |
56 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
57 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
58 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
59 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
60 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
61 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
62 */ | |
63 | |
64 #include <stdlib.h> | |
65 #include <stdio.h> | |
66 #include <string.h> | |
67 #include <unistd.h> | |
68 #include <ctype.h> | |
69 #include <limits.h> | |
70 #include <sys/types.h> | |
71 #include <sys/stat.h> | |
72 #include "cram/os.h" | |
73 #ifndef PATH_MAX | |
74 # define PATH_MAX 1024 | |
75 #endif | |
76 | |
77 #include "cram/open_trace_file.h" | |
78 #include "cram/misc.h" | |
79 #include "htslib/hfile.h" | |
80 | |
81 /* | |
82 * Tokenises the search path splitting on colons (unix) or semicolons | |
83 * (windows). | |
84 * We also explicitly add a "./" to the end of the search path | |
85 * | |
86 * Returns: A new search path with items separated by nul chars. Two nul | |
87 * chars in a row represent the end of the tokenised path. | |
88 * Returns NULL for a failure. | |
89 * | |
90 * The returned data has been malloced. It is up to the caller to free this | |
91 * memory. | |
92 */ | |
93 char *tokenise_search_path(char *searchpath) { | |
94 char *newsearch; | |
95 unsigned int i, j; | |
96 size_t len; | |
97 #ifdef _WIN32 | |
98 char path_sep = ';'; | |
99 #else | |
100 char path_sep = ':'; | |
101 #endif | |
102 | |
103 if (!searchpath) | |
104 searchpath=""; | |
105 | |
106 newsearch = (char *)malloc((len = strlen(searchpath))+5); | |
107 if (!newsearch) | |
108 return NULL; | |
109 | |
110 for (i = 0, j = 0; i < len; i++) { | |
111 /* "::" => ":". Used for escaping colons in http://foo */ | |
112 if (i < len-1 && searchpath[i] == ':' && searchpath[i+1] == ':') { | |
113 newsearch[j++] = ':'; | |
114 i++; | |
115 continue; | |
116 } | |
117 | |
118 /* Handle http:// and ftp:// too without :: */ | |
119 if (path_sep == ':') { | |
120 if ((i == 0 || (i > 0 && searchpath[i-1] == ':')) && | |
121 (!strncmp(&searchpath[i], "http:", 5) || | |
122 !strncmp(&searchpath[i], "ftp:", 4) || | |
123 !strncmp(&searchpath[i], "|http:", 6) || | |
124 !strncmp(&searchpath[i], "|ftp:", 5) || | |
125 !strncmp(&searchpath[i], "URL=http:", 9) || | |
126 !strncmp(&searchpath[i], "URL=ftp:", 8))) { | |
127 do { | |
128 newsearch[j++] = searchpath[i]; | |
129 } while (i<len && searchpath[i++] != ':'); | |
130 if (searchpath[i] == ':') | |
131 i++; | |
132 if (searchpath[i]=='/') | |
133 newsearch[j++] = searchpath[i++]; | |
134 if (searchpath[i]=='/') | |
135 newsearch[j++] = searchpath[i++]; | |
136 // Look for host:port | |
137 do { | |
138 newsearch[j++] = searchpath[i++]; | |
139 } while (i<len && searchpath[i] != ':' && searchpath[i] != '/'); | |
140 newsearch[j++] = searchpath[i++]; | |
141 if (searchpath[i] == ':') | |
142 i++; | |
143 } | |
144 } | |
145 | |
146 if (searchpath[i] == path_sep) { | |
147 /* Skip blank path components */ | |
148 if (j && newsearch[j-1] != 0) | |
149 newsearch[j++] = 0; | |
150 } else { | |
151 newsearch[j++] = searchpath[i]; | |
152 } | |
153 } | |
154 | |
155 if (j) | |
156 newsearch[j++] = 0; | |
157 newsearch[j++] = '.'; | |
158 newsearch[j++] = '/'; | |
159 newsearch[j++] = 0; | |
160 newsearch[j++] = 0; | |
161 | |
162 return newsearch; | |
163 } | |
164 | |
165 mFILE *find_file_url(char *file, char *url) { | |
166 char buf[8192], *cp; | |
167 mFILE *mf = NULL; | |
168 int maxlen = 8190 - strlen(file), len; | |
169 hFILE *hf; | |
170 | |
171 /* Expand %s for the trace name */ | |
172 for (cp = buf; *url && cp - buf < maxlen; url++) { | |
173 if (*url == '%' && *(url+1) == 's') { | |
174 url++; | |
175 cp += strlen(strcpy(cp, file)); | |
176 } else { | |
177 *cp++ = *url; | |
178 } | |
179 } | |
180 *cp++ = 0; | |
181 | |
182 if (!(hf = hopen(buf, "r"))) | |
183 return NULL; | |
184 | |
185 if (NULL == (mf = mfcreate(NULL, 0))) | |
186 return NULL; | |
187 while ((len = hread(hf, buf, 8192)) > 0) { | |
188 if (mfwrite(buf, len, 1, mf) <= 0) { | |
189 hclose_abruptly(hf); | |
190 mfdestroy(mf); | |
191 return NULL; | |
192 } | |
193 } | |
194 if (hclose(hf) < 0) { | |
195 mfdestroy(mf); | |
196 return NULL; | |
197 } | |
198 | |
199 mrewind(mf); | |
200 return mf; | |
201 } | |
202 | |
203 /* | |
204 * Searches for file in the directory 'dirname'. If it finds it, it opens | |
205 * it. This also searches for compressed versions of the file in dirname | |
206 * too. | |
207 * | |
208 * Returns mFILE pointer if found | |
209 * NULL if not | |
210 */ | |
211 static mFILE *find_file_dir(char *file, char *dirname) { | |
212 char path[PATH_MAX+1]; | |
213 size_t len = strlen(dirname); | |
214 char *cp; | |
215 | |
216 if (dirname[len-1] == '/') | |
217 len--; | |
218 | |
219 /* Special case for "./" or absolute filenames */ | |
220 if (*file == '/' || (len==1 && *dirname == '.')) { | |
221 sprintf(path, "%s", file); | |
222 } else { | |
223 /* Handle %[0-9]*s expansions, if required */ | |
224 char *path_end = path; | |
225 *path = 0; | |
226 while ((cp = strchr(dirname, '%'))) { | |
227 char *endp; | |
228 long l = strtol(cp+1, &endp, 10); | |
229 if (*endp != 's') { | |
230 strncpy(path_end, dirname, (endp+1)-dirname); | |
231 path_end += (endp+1)-dirname; | |
232 dirname = endp+1; | |
233 continue; | |
234 } | |
235 | |
236 strncpy(path_end, dirname, cp-dirname); | |
237 path_end += cp-dirname; | |
238 if (l) { | |
239 strncpy(path_end, file, l); | |
240 path_end += MIN(strlen(file), l); | |
241 file += MIN(strlen(file), l); | |
242 } else { | |
243 strcpy(path_end, file); | |
244 path_end += strlen(file); | |
245 file += strlen(file); | |
246 } | |
247 len -= (endp+1) - dirname; | |
248 dirname = endp+1; | |
249 } | |
250 strncpy(path_end, dirname, len); | |
251 path_end += MIN(strlen(dirname), len); | |
252 *path_end = 0; | |
253 if (*file) { | |
254 *path_end++ = '/'; | |
255 strcpy(path_end, file); | |
256 } | |
257 | |
258 //fprintf(stderr, "*PATH=\"%s\"\n", path); | |
259 } | |
260 | |
261 if (is_file(path)) { | |
262 return mfopen(path, "rb"); | |
263 } | |
264 | |
265 return NULL; | |
266 } | |
267 | |
268 /* | |
269 * ------------------------------------------------------------------------ | |
270 * Public functions below. | |
271 */ | |
272 | |
273 /* | |
274 * Opens a trace file named 'file'. This is initially looked for as a | |
275 * pathname relative to a file named "relative_to". This may (for | |
276 * example) be the name of an experiment file referencing the trace | |
277 * file. In this case by passing relative_to as the experiment file | |
278 * filename the trace file will be picked up in the same directory as | |
279 * the experiment file. Relative_to may be supplied as NULL. | |
280 * | |
281 * 'file' is looked for at relative_to, then the current directory, and then | |
282 * all of the locations listed in 'path' (which is a colon separated list). | |
283 * If 'path' is NULL it uses the RAWDATA environment variable instead. | |
284 * | |
285 * Returns a mFILE pointer when found. | |
286 * NULL otherwise. | |
287 */ | |
288 mFILE *open_path_mfile(char *file, char *path, char *relative_to) { | |
289 char *newsearch; | |
290 char *ele; | |
291 mFILE *fp; | |
292 | |
293 /* Use path first */ | |
294 if (!path) | |
295 path = getenv("RAWDATA"); | |
296 if (NULL == (newsearch = tokenise_search_path(path))) | |
297 return NULL; | |
298 | |
299 /* | |
300 * Step through the search path testing out each component. | |
301 * We now look through each path element treating some prefixes as | |
302 * special, otherwise we treat the element as a directory. | |
303 */ | |
304 for (ele = newsearch; *ele; ele += strlen(ele)+1) { | |
305 char *ele2; | |
306 | |
307 /* | |
308 * '|' prefixing a path component indicates that we do not | |
309 * wish to perform the compression extension searching in that | |
310 * location. | |
311 * | |
312 * NB: this has been removed from the htslib implementation. | |
313 */ | |
314 if (*ele == '|') { | |
315 ele2 = ele+1; | |
316 } else { | |
317 ele2 = ele; | |
318 } | |
319 | |
320 if (0 == strncmp(ele2, "URL=", 4)) { | |
321 if ((fp = find_file_url(file, ele2+4))) { | |
322 free(newsearch); | |
323 return fp; | |
324 } | |
325 } else if (!strncmp(ele2, "http:", 5) || | |
326 !strncmp(ele2, "ftp:", 4)) { | |
327 if ((fp = find_file_url(file, ele2))) { | |
328 free(newsearch); | |
329 return fp; | |
330 } | |
331 } else if ((fp = find_file_dir(file, ele2))) { | |
332 free(newsearch); | |
333 return fp; | |
334 } | |
335 } | |
336 | |
337 free(newsearch); | |
338 | |
339 /* Look in the same location as the incoming 'relative_to' filename */ | |
340 if (relative_to) { | |
341 char *cp; | |
342 char relative_path[PATH_MAX+1]; | |
343 strcpy(relative_path, relative_to); | |
344 if ((cp = strrchr(relative_path, '/'))) | |
345 *cp = 0; | |
346 if ((fp = find_file_dir(file, relative_path))) | |
347 return fp; | |
348 } | |
349 | |
350 return NULL; | |
351 } |