0
|
1 /*
|
|
2 Author: James Bonfield
|
|
3
|
|
4 Copyright (c) 2000-2001 MEDICAL RESEARCH COUNCIL
|
|
5 All rights reserved
|
|
6
|
|
7 Redistribution and use in source and binary forms, with or without
|
|
8 modification, are permitted provided that the following conditions are met:
|
|
9
|
|
10 1. Redistributions of source code must retain the above copyright notice,
|
|
11 this list of conditions and the following disclaimer.
|
|
12
|
|
13 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
14 this list of conditions and the following disclaimer in the documentation
|
|
15 and/or other materials provided with the distribution.
|
|
16
|
|
17 3. Neither the name of the MEDICAL RESEARCH COUNCIL, THE LABORATORY OF
|
|
18 MOLECULAR BIOLOGY nor the names of its contributors may be used to endorse or
|
|
19 promote products derived from this software without specific prior written
|
|
20 permission.
|
|
21
|
|
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
23 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
24 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
25 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
|
26 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
27 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
|
29 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
30 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
31 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
32 */
|
|
33
|
|
34 /*
|
|
35 Copyright (c) 2008, 2009, 2013, 2014 Genome Research Ltd.
|
|
36 Author: James Bonfield <jkb@sanger.ac.uk>
|
|
37
|
|
38 Redistribution and use in source and binary forms, with or without
|
|
39 modification, are permitted provided that the following conditions are met:
|
|
40
|
|
41 1. Redistributions of source code must retain the above copyright notice,
|
|
42 this list of conditions and the following disclaimer.
|
|
43
|
|
44 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
45 this list of conditions and the following disclaimer in the documentation
|
|
46 and/or other materials provided with the distribution.
|
|
47
|
|
48 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
|
|
49 Institute nor the names of its contributors may be used to endorse or promote
|
|
50 products derived from this software without specific prior written permission.
|
|
51
|
|
52 THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
|
|
53 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
54 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
55 DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
|
|
56 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
57 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
|
58 SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
59 CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
60 OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
61 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
62 */
|
|
63
|
|
64 #include <stdlib.h>
|
|
65 #include <stdio.h>
|
|
66 #include <string.h>
|
|
67 #include <unistd.h>
|
|
68 #include <ctype.h>
|
|
69 #include <limits.h>
|
|
70 #include <sys/types.h>
|
|
71 #include <sys/stat.h>
|
|
72 #include "cram/os.h"
|
|
73 #ifndef PATH_MAX
|
|
74 # define PATH_MAX 1024
|
|
75 #endif
|
|
76
|
|
77 #include "cram/open_trace_file.h"
|
|
78 #include "cram/misc.h"
|
|
79 #include "htslib/hfile.h"
|
|
80
|
|
81 /*
|
|
82 * Tokenises the search path splitting on colons (unix) or semicolons
|
|
83 * (windows).
|
|
84 * We also explicitly add a "./" to the end of the search path
|
|
85 *
|
|
86 * Returns: A new search path with items separated by nul chars. Two nul
|
|
87 * chars in a row represent the end of the tokenised path.
|
|
88 * Returns NULL for a failure.
|
|
89 *
|
|
90 * The returned data has been malloced. It is up to the caller to free this
|
|
91 * memory.
|
|
92 */
|
|
93 char *tokenise_search_path(char *searchpath) {
|
|
94 char *newsearch;
|
|
95 unsigned int i, j;
|
|
96 size_t len;
|
|
97 #ifdef _WIN32
|
|
98 char path_sep = ';';
|
|
99 #else
|
|
100 char path_sep = ':';
|
|
101 #endif
|
|
102
|
|
103 if (!searchpath)
|
|
104 searchpath="";
|
|
105
|
|
106 newsearch = (char *)malloc((len = strlen(searchpath))+5);
|
|
107 if (!newsearch)
|
|
108 return NULL;
|
|
109
|
|
110 for (i = 0, j = 0; i < len; i++) {
|
|
111 /* "::" => ":". Used for escaping colons in http://foo */
|
|
112 if (i < len-1 && searchpath[i] == ':' && searchpath[i+1] == ':') {
|
|
113 newsearch[j++] = ':';
|
|
114 i++;
|
|
115 continue;
|
|
116 }
|
|
117
|
|
118 /* Handle http:// and ftp:// too without :: */
|
|
119 if (path_sep == ':') {
|
|
120 if ((i == 0 || (i > 0 && searchpath[i-1] == ':')) &&
|
|
121 (!strncmp(&searchpath[i], "http:", 5) ||
|
|
122 !strncmp(&searchpath[i], "ftp:", 4) ||
|
|
123 !strncmp(&searchpath[i], "|http:", 6) ||
|
|
124 !strncmp(&searchpath[i], "|ftp:", 5) ||
|
|
125 !strncmp(&searchpath[i], "URL=http:", 9) ||
|
|
126 !strncmp(&searchpath[i], "URL=ftp:", 8))) {
|
|
127 do {
|
|
128 newsearch[j++] = searchpath[i];
|
|
129 } while (i<len && searchpath[i++] != ':');
|
|
130 if (searchpath[i] == ':')
|
|
131 i++;
|
|
132 if (searchpath[i]=='/')
|
|
133 newsearch[j++] = searchpath[i++];
|
|
134 if (searchpath[i]=='/')
|
|
135 newsearch[j++] = searchpath[i++];
|
|
136 // Look for host:port
|
|
137 do {
|
|
138 newsearch[j++] = searchpath[i++];
|
|
139 } while (i<len && searchpath[i] != ':' && searchpath[i] != '/');
|
|
140 newsearch[j++] = searchpath[i++];
|
|
141 if (searchpath[i] == ':')
|
|
142 i++;
|
|
143 }
|
|
144 }
|
|
145
|
|
146 if (searchpath[i] == path_sep) {
|
|
147 /* Skip blank path components */
|
|
148 if (j && newsearch[j-1] != 0)
|
|
149 newsearch[j++] = 0;
|
|
150 } else {
|
|
151 newsearch[j++] = searchpath[i];
|
|
152 }
|
|
153 }
|
|
154
|
|
155 if (j)
|
|
156 newsearch[j++] = 0;
|
|
157 newsearch[j++] = '.';
|
|
158 newsearch[j++] = '/';
|
|
159 newsearch[j++] = 0;
|
|
160 newsearch[j++] = 0;
|
|
161
|
|
162 return newsearch;
|
|
163 }
|
|
164
|
|
165 mFILE *find_file_url(char *file, char *url) {
|
|
166 char buf[8192], *cp;
|
|
167 mFILE *mf = NULL;
|
|
168 int maxlen = 8190 - strlen(file), len;
|
|
169 hFILE *hf;
|
|
170
|
|
171 /* Expand %s for the trace name */
|
|
172 for (cp = buf; *url && cp - buf < maxlen; url++) {
|
|
173 if (*url == '%' && *(url+1) == 's') {
|
|
174 url++;
|
|
175 cp += strlen(strcpy(cp, file));
|
|
176 } else {
|
|
177 *cp++ = *url;
|
|
178 }
|
|
179 }
|
|
180 *cp++ = 0;
|
|
181
|
|
182 if (!(hf = hopen(buf, "r")))
|
|
183 return NULL;
|
|
184
|
|
185 if (NULL == (mf = mfcreate(NULL, 0)))
|
|
186 return NULL;
|
|
187 while ((len = hread(hf, buf, 8192)) > 0) {
|
|
188 if (mfwrite(buf, len, 1, mf) <= 0) {
|
|
189 hclose_abruptly(hf);
|
|
190 mfdestroy(mf);
|
|
191 return NULL;
|
|
192 }
|
|
193 }
|
|
194 if (hclose(hf) < 0) {
|
|
195 mfdestroy(mf);
|
|
196 return NULL;
|
|
197 }
|
|
198
|
|
199 mrewind(mf);
|
|
200 return mf;
|
|
201 }
|
|
202
|
|
203 /*
|
|
204 * Searches for file in the directory 'dirname'. If it finds it, it opens
|
|
205 * it. This also searches for compressed versions of the file in dirname
|
|
206 * too.
|
|
207 *
|
|
208 * Returns mFILE pointer if found
|
|
209 * NULL if not
|
|
210 */
|
|
211 static mFILE *find_file_dir(char *file, char *dirname) {
|
|
212 char path[PATH_MAX+1];
|
|
213 size_t len = strlen(dirname);
|
|
214 char *cp;
|
|
215
|
|
216 if (dirname[len-1] == '/')
|
|
217 len--;
|
|
218
|
|
219 /* Special case for "./" or absolute filenames */
|
|
220 if (*file == '/' || (len==1 && *dirname == '.')) {
|
|
221 sprintf(path, "%s", file);
|
|
222 } else {
|
|
223 /* Handle %[0-9]*s expansions, if required */
|
|
224 char *path_end = path;
|
|
225 *path = 0;
|
|
226 while ((cp = strchr(dirname, '%'))) {
|
|
227 char *endp;
|
|
228 long l = strtol(cp+1, &endp, 10);
|
|
229 if (*endp != 's') {
|
|
230 strncpy(path_end, dirname, (endp+1)-dirname);
|
|
231 path_end += (endp+1)-dirname;
|
|
232 dirname = endp+1;
|
|
233 continue;
|
|
234 }
|
|
235
|
|
236 strncpy(path_end, dirname, cp-dirname);
|
|
237 path_end += cp-dirname;
|
|
238 if (l) {
|
|
239 strncpy(path_end, file, l);
|
|
240 path_end += MIN(strlen(file), l);
|
|
241 file += MIN(strlen(file), l);
|
|
242 } else {
|
|
243 strcpy(path_end, file);
|
|
244 path_end += strlen(file);
|
|
245 file += strlen(file);
|
|
246 }
|
|
247 len -= (endp+1) - dirname;
|
|
248 dirname = endp+1;
|
|
249 }
|
|
250 strncpy(path_end, dirname, len);
|
|
251 path_end += MIN(strlen(dirname), len);
|
|
252 *path_end = 0;
|
|
253 if (*file) {
|
|
254 *path_end++ = '/';
|
|
255 strcpy(path_end, file);
|
|
256 }
|
|
257
|
|
258 //fprintf(stderr, "*PATH=\"%s\"\n", path);
|
|
259 }
|
|
260
|
|
261 if (is_file(path)) {
|
|
262 return mfopen(path, "rb");
|
|
263 }
|
|
264
|
|
265 return NULL;
|
|
266 }
|
|
267
|
|
268 /*
|
|
269 * ------------------------------------------------------------------------
|
|
270 * Public functions below.
|
|
271 */
|
|
272
|
|
273 /*
|
|
274 * Opens a trace file named 'file'. This is initially looked for as a
|
|
275 * pathname relative to a file named "relative_to". This may (for
|
|
276 * example) be the name of an experiment file referencing the trace
|
|
277 * file. In this case by passing relative_to as the experiment file
|
|
278 * filename the trace file will be picked up in the same directory as
|
|
279 * the experiment file. Relative_to may be supplied as NULL.
|
|
280 *
|
|
281 * 'file' is looked for at relative_to, then the current directory, and then
|
|
282 * all of the locations listed in 'path' (which is a colon separated list).
|
|
283 * If 'path' is NULL it uses the RAWDATA environment variable instead.
|
|
284 *
|
|
285 * Returns a mFILE pointer when found.
|
|
286 * NULL otherwise.
|
|
287 */
|
|
288 mFILE *open_path_mfile(char *file, char *path, char *relative_to) {
|
|
289 char *newsearch;
|
|
290 char *ele;
|
|
291 mFILE *fp;
|
|
292
|
|
293 /* Use path first */
|
|
294 if (!path)
|
|
295 path = getenv("RAWDATA");
|
|
296 if (NULL == (newsearch = tokenise_search_path(path)))
|
|
297 return NULL;
|
|
298
|
|
299 /*
|
|
300 * Step through the search path testing out each component.
|
|
301 * We now look through each path element treating some prefixes as
|
|
302 * special, otherwise we treat the element as a directory.
|
|
303 */
|
|
304 for (ele = newsearch; *ele; ele += strlen(ele)+1) {
|
|
305 char *ele2;
|
|
306
|
|
307 /*
|
|
308 * '|' prefixing a path component indicates that we do not
|
|
309 * wish to perform the compression extension searching in that
|
|
310 * location.
|
|
311 *
|
|
312 * NB: this has been removed from the htslib implementation.
|
|
313 */
|
|
314 if (*ele == '|') {
|
|
315 ele2 = ele+1;
|
|
316 } else {
|
|
317 ele2 = ele;
|
|
318 }
|
|
319
|
|
320 if (0 == strncmp(ele2, "URL=", 4)) {
|
|
321 if ((fp = find_file_url(file, ele2+4))) {
|
|
322 free(newsearch);
|
|
323 return fp;
|
|
324 }
|
|
325 } else if (!strncmp(ele2, "http:", 5) ||
|
|
326 !strncmp(ele2, "ftp:", 4)) {
|
|
327 if ((fp = find_file_url(file, ele2))) {
|
|
328 free(newsearch);
|
|
329 return fp;
|
|
330 }
|
|
331 } else if ((fp = find_file_dir(file, ele2))) {
|
|
332 free(newsearch);
|
|
333 return fp;
|
|
334 }
|
|
335 }
|
|
336
|
|
337 free(newsearch);
|
|
338
|
|
339 /* Look in the same location as the incoming 'relative_to' filename */
|
|
340 if (relative_to) {
|
|
341 char *cp;
|
|
342 char relative_path[PATH_MAX+1];
|
|
343 strcpy(relative_path, relative_to);
|
|
344 if ((cp = strrchr(relative_path, '/')))
|
|
345 *cp = 0;
|
|
346 if ((fp = find_file_dir(file, relative_path)))
|
|
347 return fp;
|
|
348 }
|
|
349
|
|
350 return NULL;
|
|
351 }
|