diff ezBAMQC/src/htslib/cram/zfio.c @ 0:dfa3745e5fd8

Uploaded
author youngkim
date Thu, 24 Mar 2016 17:12:52 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ezBAMQC/src/htslib/cram/zfio.c	Thu Mar 24 17:12:52 2016 -0400
@@ -0,0 +1,185 @@
+/*
+Copyright (c) 2009-2013 Genome Research Ltd.
+Author: James Bonfield <jkb@sanger.ac.uk>
+
+Redistribution and use in source and binary forms, with or without 
+modification, are permitted provided that the following conditions are met:
+
+   1. Redistributions of source code must retain the above copyright notice, 
+this list of conditions and the following disclaimer.
+
+   2. Redistributions in binary form must reproduce the above copyright notice, 
+this list of conditions and the following disclaimer in the documentation 
+and/or other materials provided with the distribution.
+
+   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
+Institute nor the names of its contributors may be used to endorse or promote
+products derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND 
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 
+DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "io_lib_config.h"
+#endif
+
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "cram/os.h"
+#include "cram/zfio.h"
+
+/* ------------------------------------------------------------------------ */
+/* Some wrappers around FILE * vs gzFile *, allowing for either */
+
+/*
+ * gzopen() works on both compressed and uncompressed data, but it has
+ * a significant performance hit even for uncompressed data (tested as
+ * 25s using FILE* to 46s via gzOpen and 66s via gzOpen when gzipped).
+ *
+ * Hence we use our own wrapper 'zfp' which is a FILE* when uncompressed
+ * and gzFile* when compressed. This also means we could hide bzopen in
+ * there too if desired.
+ */
+
+off_t zftello(zfp *zf) {
+    return zf->fp ? ftello(zf->fp) : -1;
+}
+
+int zfseeko(zfp *zf, off_t offset, int whence) {
+    return zf->fp ? fseeko(zf->fp, offset, whence) : -1;
+}
+
+
+/*
+ * A wrapper for either fgets or gzgets depending on what has been
+ * opened.
+ */
+char *zfgets(char *line, int size, zfp *zf) {
+    if (zf->fp)
+	return fgets(line, size, zf->fp);
+    else
+	return gzgets(zf->gz, line, size);
+}
+
+/*
+ * A wrapper for either fputs or gzputs depending on what has been
+ * opened.
+ */
+int zfputs(char *line, zfp *zf) {
+    if (zf->fp)
+	return fputs(line, zf->fp);
+    else
+	return gzputs(zf->gz, line) ? 0 : EOF;
+}
+
+/*
+ * Peeks at and returns the next character without consuming it from the
+ * input. (Ie a combination of getc and ungetc).
+ */
+int zfpeek(zfp *zf) {
+    int c;
+
+    if (zf->fp) {
+	c = getc(zf->fp);
+	if (c != EOF)
+	    ungetc(c, zf->fp);
+    } else {
+	c = gzgetc(zf->gz);
+	if (c != EOF)
+	    gzungetc(c, zf->gz);
+    }
+
+    return c;
+}
+
+/* A replacement for either feof of gzeof */
+int zfeof(zfp *zf) {
+    return zf->fp ? feof(zf->fp) : gzeof(zf->gz);
+}
+
+/* A replacement for either fopen or gzopen */
+zfp *zfopen(const char *path, const char *mode) {
+    char path2[1024];
+    zfp *zf;
+
+    if (!(zf = (zfp *)malloc(sizeof(*zf))))
+	return NULL;
+    zf->fp = NULL;
+    zf->gz = NULL;
+
+    /* Try normal fopen */
+    if (mode[0] != 'z' && mode[1] != 'z' &&
+	NULL != (zf->fp = fopen(path, mode))) {
+	unsigned char magic[2];
+	if (2 != fread(magic, 1, 2, zf->fp)) {
+	    free(zf);
+	    return NULL;
+	}
+	if (!(magic[0] == 0x1f &&
+	      magic[1] == 0x8b)) {
+	    fseeko(zf->fp, 0, SEEK_SET);
+	    return zf;
+	}
+
+	fclose(zf->fp);
+	zf->fp = NULL;
+    }
+
+#ifdef HAVE_POPEN
+    /*
+     * I've no idea why, by gzgets is VERY slow, maybe because it handles
+     * arbitrary seeks.
+     * popen to gzip -cd is 3 times faster though.
+     */
+    if (*mode == 'w') {
+    } else {
+	if (access(path, R_OK) == 0) {
+	    sprintf(path2, "gzip -cd < %.*s", 1000, path);
+	    if (NULL != (zf->fp = popen(path2, "r")))
+		return zf;
+	}
+	
+	sprintf(path2, "gzip -cd < %.*s.gz", 1000, path);
+	if (NULL != (zf->fp = popen(path2, "r")))
+	    return zf;
+
+	printf("Failed on %s\n", path);
+    } else {
+	sprintf(path2, "gzip > %.*s", 1000, path);
+	if (NULL != (zf->fp = popen(path2, "w")))
+	    return zf;
+	}
+	
+	printf("Failed on %s\n", path);
+    }
+#else
+    /* Gzopen instead */
+    if ((zf->gz = gzopen(path, mode)))
+	return zf;
+
+    sprintf(path2, "%.*s.gz", 1020, path);
+    if ((zf->gz = gzopen(path2, mode)))
+	return zf;
+#endif
+
+    perror(path);
+
+    free(zf);
+    return NULL;
+}
+
+int zfclose(zfp *zf) {
+    int r = (zf->fp) ? fclose(zf->fp) : gzclose(zf->gz);
+    free(zf);
+    return r;
+}