diff ezBAMQC/src/htslib/cram/open_trace_file.c @ 0:dfa3745e5fd8

Uploaded
author youngkim
date Thu, 24 Mar 2016 17:12:52 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ezBAMQC/src/htslib/cram/open_trace_file.c	Thu Mar 24 17:12:52 2016 -0400
@@ -0,0 +1,351 @@
+/*
+Author: James Bonfield
+
+Copyright (c) 2000-2001 MEDICAL RESEARCH COUNCIL
+All rights reserved
+
+Redistribution and use in source and binary forms, with or without 
+modification, are permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice, 
+this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice, 
+this list of conditions and the following disclaimer in the documentation 
+and/or other materials provided with the distribution.
+
+   3. Neither the name of the MEDICAL RESEARCH COUNCIL, THE LABORATORY OF 
+MOLECULAR BIOLOGY nor the names of its contributors may be used to endorse or 
+promote products derived from this software without specific prior written 
+permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*
+Copyright (c) 2008, 2009, 2013, 2014 Genome Research Ltd.
+Author: James Bonfield <jkb@sanger.ac.uk>
+
+Redistribution and use in source and binary forms, with or without 
+modification, are permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice, 
+this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice, 
+this list of conditions and the following disclaimer in the documentation 
+and/or other materials provided with the distribution.
+
+   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+Institute nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND 
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include "cram/os.h"
+#ifndef PATH_MAX
+#  define PATH_MAX 1024
+#endif
+
+#include "cram/open_trace_file.h"
+#include "cram/misc.h"
+#include "htslib/hfile.h"
+
+/*
+ * Tokenises the search path splitting on colons (unix) or semicolons
+ * (windows).
+ * We also  explicitly add a "./" to the end of the search path
+ *
+ * Returns: A new search path with items separated by nul chars. Two nul
+ *          chars in a row represent the end of the tokenised path.
+ * Returns NULL for a failure.
+ *
+ * The returned data has been malloced. It is up to the caller to free this
+ * memory.
+ */
+char *tokenise_search_path(char *searchpath) {
+    char *newsearch;
+    unsigned int i, j;
+    size_t len;
+#ifdef _WIN32
+    char path_sep = ';';
+#else
+    char path_sep = ':';
+#endif
+
+    if (!searchpath)
+	searchpath="";
+
+    newsearch = (char *)malloc((len = strlen(searchpath))+5);
+    if (!newsearch)
+	return NULL;
+
+    for (i = 0, j = 0; i < len; i++) {
+	/* "::" => ":". Used for escaping colons in http://foo */
+	if (i < len-1 && searchpath[i] == ':' && searchpath[i+1] == ':') {
+	    newsearch[j++] = ':';
+	    i++;
+	    continue;
+	}
+
+	/* Handle http:// and ftp:// too without :: */
+	if (path_sep == ':') {
+	    if ((i == 0 || (i > 0 && searchpath[i-1] == ':')) &&
+		(!strncmp(&searchpath[i], "http:",     5) ||
+		 !strncmp(&searchpath[i], "ftp:",      4) ||
+		 !strncmp(&searchpath[i], "|http:",    6) ||
+		 !strncmp(&searchpath[i], "|ftp:",     5) ||
+		 !strncmp(&searchpath[i], "URL=http:", 9) ||
+		 !strncmp(&searchpath[i], "URL=ftp:",  8))) {
+		do {
+		    newsearch[j++] = searchpath[i];
+		} while (i<len && searchpath[i++] != ':');
+		if (searchpath[i] == ':')
+		    i++;
+		if (searchpath[i]=='/')
+		    newsearch[j++] = searchpath[i++];
+		if (searchpath[i]=='/')
+		    newsearch[j++] = searchpath[i++];
+		// Look for host:port
+		do {
+		    newsearch[j++] = searchpath[i++];
+		} while (i<len && searchpath[i] != ':' && searchpath[i] != '/');
+		newsearch[j++] = searchpath[i++];
+		if (searchpath[i] == ':')
+		    i++;
+	    }
+	}
+
+	if (searchpath[i] == path_sep) {
+	    /* Skip blank path components */
+	    if (j && newsearch[j-1] != 0)
+		newsearch[j++] = 0;
+	} else {
+	    newsearch[j++] = searchpath[i];
+	}
+    }
+
+    if (j)
+	newsearch[j++] = 0;
+    newsearch[j++] = '.';
+    newsearch[j++] = '/';
+    newsearch[j++] = 0;
+    newsearch[j++] = 0;
+    
+    return newsearch;
+}
+
+mFILE *find_file_url(char *file, char *url) {
+    char buf[8192], *cp;
+    mFILE *mf = NULL;
+    int maxlen = 8190 - strlen(file), len;
+    hFILE *hf;
+
+    /* Expand %s for the trace name */
+    for (cp = buf; *url && cp - buf < maxlen; url++) {
+	if (*url == '%' && *(url+1) == 's') {
+	    url++;
+	    cp += strlen(strcpy(cp, file));
+	} else {
+	    *cp++ = *url;
+	}
+    }
+    *cp++ = 0;
+
+    if (!(hf = hopen(buf, "r")))
+	return NULL;
+
+    if (NULL == (mf = mfcreate(NULL, 0)))
+	return NULL;
+    while ((len = hread(hf, buf, 8192)) > 0) {
+	if (mfwrite(buf, len, 1, mf) <= 0) {
+	    hclose_abruptly(hf);
+	    mfdestroy(mf);
+	    return NULL;
+	}
+    }
+    if (hclose(hf) < 0) {
+	mfdestroy(mf);
+	return NULL;
+    }
+
+    mrewind(mf);
+    return mf;
+}
+
+/*
+ * Searches for file in the directory 'dirname'. If it finds it, it opens
+ * it. This also searches for compressed versions of the file in dirname
+ * too.
+ *
+ * Returns mFILE pointer if found
+ *         NULL if not
+ */
+static mFILE *find_file_dir(char *file, char *dirname) {
+    char path[PATH_MAX+1];
+    size_t len = strlen(dirname);
+    char *cp;
+
+    if (dirname[len-1] == '/')
+	len--;
+
+    /* Special case for "./" or absolute filenames */
+    if (*file == '/' || (len==1 && *dirname == '.')) {
+	sprintf(path, "%s", file);
+    } else {
+	/* Handle %[0-9]*s expansions, if required */
+	char *path_end = path;
+	*path = 0;
+	while ((cp = strchr(dirname, '%'))) {
+	    char *endp;
+	    long l = strtol(cp+1, &endp, 10);
+	    if (*endp != 's') {
+		strncpy(path_end, dirname, (endp+1)-dirname);
+		path_end += (endp+1)-dirname;
+		dirname = endp+1;
+		continue;
+	    }
+	    
+	    strncpy(path_end, dirname, cp-dirname);
+	    path_end += cp-dirname;
+	    if (l) {
+		strncpy(path_end, file, l);
+		path_end += MIN(strlen(file), l);
+		file     += MIN(strlen(file), l);
+	    } else {
+		strcpy(path_end, file);
+		path_end += strlen(file);
+		file     += strlen(file);
+	    }
+	    len -= (endp+1) - dirname;
+	    dirname = endp+1;
+	}
+	strncpy(path_end, dirname, len);
+	path_end += MIN(strlen(dirname), len);
+	*path_end = 0;
+	if (*file) {
+	    *path_end++ = '/';
+	    strcpy(path_end, file);
+	}
+
+	//fprintf(stderr, "*PATH=\"%s\"\n", path);
+    }
+
+    if (is_file(path)) {
+	return mfopen(path, "rb");
+    }
+
+    return NULL;
+}
+
+/*
+ * ------------------------------------------------------------------------
+ * Public functions below.
+ */
+
+/*
+ * Opens a trace file named 'file'. This is initially looked for as a
+ * pathname relative to a file named "relative_to". This may (for
+ * example) be the name of an experiment file referencing the trace
+ * file. In this case by passing relative_to as the experiment file
+ * filename the trace file will be picked up in the same directory as
+ * the experiment file. Relative_to may be supplied as NULL.
+ *
+ * 'file' is looked for at relative_to, then the current directory, and then
+ * all of the locations listed in 'path' (which is a colon separated list).
+ * If 'path' is NULL it uses the RAWDATA environment variable instead.
+ *
+ * Returns a mFILE pointer when found.
+ *           NULL otherwise.
+ */
+mFILE *open_path_mfile(char *file, char *path, char *relative_to) {
+    char *newsearch;
+    char *ele;
+    mFILE *fp;
+
+    /* Use path first */
+    if (!path)
+	path = getenv("RAWDATA");
+    if (NULL == (newsearch = tokenise_search_path(path)))
+	return NULL;
+    
+    /*
+     * Step through the search path testing out each component.
+     * We now look through each path element treating some prefixes as
+     * special, otherwise we treat the element as a directory.
+     */
+    for (ele = newsearch; *ele; ele += strlen(ele)+1) {
+	char *ele2;
+
+	/*
+	 * '|' prefixing a path component indicates that we do not
+	 * wish to perform the compression extension searching in that
+	 * location.
+	 *
+	 * NB: this has been removed from the htslib implementation.
+	 */
+	if (*ele == '|') {
+	    ele2 = ele+1;
+	} else {
+	    ele2 = ele;
+	}
+
+	if (0 == strncmp(ele2, "URL=", 4)) {
+	    if ((fp = find_file_url(file, ele2+4))) {
+		free(newsearch);
+		return fp;
+	    }
+	} else if (!strncmp(ele2, "http:", 5) ||
+		   !strncmp(ele2, "ftp:", 4)) {
+	    if ((fp = find_file_url(file, ele2))) {
+		free(newsearch);
+		return fp;
+	    }
+	} else if ((fp = find_file_dir(file, ele2))) {
+	    free(newsearch);
+	    return fp;
+	} 
+   }
+
+    free(newsearch);
+
+    /* Look in the same location as the incoming 'relative_to' filename */
+    if (relative_to) {
+	char *cp;
+	char relative_path[PATH_MAX+1];
+	strcpy(relative_path, relative_to);
+	if ((cp = strrchr(relative_path, '/')))
+	    *cp = 0;
+	if ((fp = find_file_dir(file, relative_path)))
+	    return fp;
+    }
+
+    return NULL;
+}