diff --git a/Makefile b/Makefile index bbba897f3..285580633 100644 --- a/Makefile +++ b/Makefile @@ -176,8 +176,7 @@ LIBHTS_OBJS = \ cram/rANS_static.o \ cram/sam_header.o \ cram/string_alloc.o \ - cram/vlen.o \ - cram/zfio.o + cram/vlen.o PLUGIN_EXT = PLUGIN_OBJS = @@ -320,7 +319,7 @@ cram/cram_codecs.o cram/cram_codecs.pico: cram/cram_codecs.c config.h $(cram_h) cram/cram_decode.o cram/cram_decode.pico: cram/cram_decode.c config.h $(cram_h) $(cram_os_h) $(htslib_hts_h) cram/cram_encode.o cram/cram_encode.pico: cram/cram_encode.c config.h $(cram_h) $(cram_os_h) $(htslib_hts_h) $(htslib_hts_endian_h) cram/cram_external.o cram/cram_external.pico: cram/cram_external.c config.h $(htslib_hfile_h) $(cram_h) -cram/cram_index.o cram/cram_index.pico: cram/cram_index.c config.h $(htslib_hfile_h) $(hts_internal_h) $(cram_h) $(cram_os_h) cram/zfio.h +cram/cram_index.o cram/cram_index.pico: cram/cram_index.c config.h $(htslib_bgzf_h) $(htslib_hfile_h) $(hts_internal_h) $(cram_h) $(cram_os_h) cram/cram_io.o cram/cram_io.pico: cram/cram_io.c config.h $(cram_h) $(cram_os_h) $(htslib_hts_h) $(cram_open_trace_file_h) cram/rANS_static.h $(htslib_hfile_h) $(htslib_bgzf_h) $(htslib_faidx_h) $(hts_internal_h) cram/cram_samtools.o cram/cram_samtools.pico: cram/cram_samtools.c config.h $(cram_h) $(htslib_sam_h) cram/cram_stats.o cram/cram_stats.pico: cram/cram_stats.c config.h $(cram_h) $(cram_os_h) @@ -333,7 +332,6 @@ cram/sam_header.o cram/sam_header.pico: cram/sam_header.c config.h $(cram_sam_he cram/string_alloc.o cram/string_alloc.pico: cram/string_alloc.c config.h cram/string_alloc.h thread_pool.o thread_pool.pico: thread_pool.c config.h $(thread_pool_internal_h) cram/vlen.o cram/vlen.pico: cram/vlen.c config.h cram/vlen.h $(cram_os_h) -cram/zfio.o cram/zfio.pico: cram/zfio.c config.h $(cram_os_h) cram/zfio.h bgzip: bgzip.o libhts.a diff --git a/cram/cram_index.c b/cram/cram_index.c index d84712911..53f178871 100644 --- a/cram/cram_index.c +++ b/cram/cram_index.c @@ -60,11 +60,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include "htslib/bgzf.h" #include "htslib/hfile.h" #include "hts_internal.h" #include "cram/cram.h" #include "cram/os.h" -#include "cram/zfio.h" #if 0 static void dump_index_(cram_index *e, int level) { @@ -450,12 +450,13 @@ int cram_seek_to_refpos(cram_fd *fd, cram_range *r) { * decode the slice to look at the RI data series instead. * * Returns 0 on success - * -1 on failure + * -1 on read failure + * -4 on write failure */ static int cram_index_build_multiref(cram_fd *fd, cram_container *c, cram_slice *s, - zfp *fp, + BGZF *fp, off_t cpos, int32_t landmark, int sz) { @@ -477,7 +478,8 @@ static int cram_index_build_multiref(cram_fd *fd, sprintf(buf, "%d\t%d\t%d\t%"PRId64"\t%d\t%d\n", ref, ref_start, ref_end - ref_start + 1, (int64_t)cpos, landmark, sz); - zfputs(buf, fp); + if (bgzf_write(fp, buf, strlen(buf)) < 0) + return -4; } ref = s->crecs[i].ref_id; @@ -489,7 +491,8 @@ static int cram_index_build_multiref(cram_fd *fd, sprintf(buf, "%d\t%d\t%d\t%"PRId64"\t%d\t%d\n", ref, ref_start, ref_end - ref_start + 1, (int64_t)cpos, landmark, sz); - zfputs(buf, fp); + if (bgzf_write(fp, buf, strlen(buf)) < 0) + return -4; } return 0; @@ -509,7 +512,7 @@ static int cram_index_build_multiref(cram_fd *fd, int cram_index_build(cram_fd *fd, const char *fn_base, const char *fn_idx) { cram_container *c; off_t cpos, spos, hpos; - zfp *fp; + BGZF *fp; kstring_t fn_idx_str = {0}; if (! fn_idx) { @@ -518,7 +521,7 @@ int cram_index_build(cram_fd *fd, const char *fn_base, const char *fn_idx) { fn_idx = fn_idx_str.s; } - if (!(fp = zfopen(fn_idx, "wz"))) { + if (!(fp = bgzf_open(fn_idx, "wg"))) { perror(fn_idx); free(fn_idx_str.s); return -4; @@ -549,30 +552,35 @@ int cram_index_build(cram_fd *fd, const char *fn_base, const char *fn_idx) { for (j = 0; j < c->num_landmarks; j++) { char buf[1024]; cram_slice *s; - int sz; + int sz, ret; spos = htell(fd->fp); assert(spos - cpos - c->offset == c->landmark[j]); if (!(s = cram_read_slice(fd))) { - zfclose(fp); + bgzf_close(fp); return -1; } sz = (int)(htell(fd->fp) - spos); if (s->hdr->ref_seq_id == -2) { - cram_index_build_multiref(fd, c, s, fp, - cpos, c->landmark[j], sz); + ret = cram_index_build_multiref(fd, c, s, fp, + cpos, c->landmark[j], sz); } else { sprintf(buf, "%d\t%d\t%d\t%"PRId64"\t%d\t%d\n", s->hdr->ref_seq_id, s->hdr->ref_seq_start, s->hdr->ref_seq_span, (int64_t)cpos, c->landmark[j], sz); - zfputs(buf, fp); + ret = (bgzf_write(fp, buf, strlen(buf)) >= 0)? 0 : -4; } cram_free_slice(s); + + if (ret < 0) { + bgzf_close(fp); + return ret; + } } cpos = htell(fd->fp); @@ -581,10 +589,9 @@ int cram_index_build(cram_fd *fd, const char *fn_base, const char *fn_idx) { cram_free_container(c); } if (fd->err) { - zfclose(fp); + bgzf_close(fp); return -1; } - - return (zfclose(fp) >= 0)? 0 : -4; + return (bgzf_close(fp) >= 0)? 0 : -4; } diff --git a/cram/zfio.c b/cram/zfio.c deleted file mode 100644 index 34e2587ab..000000000 --- a/cram/zfio.c +++ /dev/null @@ -1,154 +0,0 @@ -/* -Copyright (c) 2009-2013 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#include - -#include -#include -#include - -#include "cram/os.h" -#include "cram/zfio.h" - -/* ------------------------------------------------------------------------ */ -/* Some wrappers around FILE * vs gzFile *, allowing for either */ - -/* - * gzopen() works on both compressed and uncompressed data, but it has - * a significant performance hit even for uncompressed data (tested as - * 25s using FILE* to 46s via gzOpen and 66s via gzOpen when gzipped). - * - * Hence we use our own wrapper 'zfp' which is a FILE* when uncompressed - * and gzFile* when compressed. This also means we could hide bzopen in - * there too if desired. - */ - -off_t zftello(zfp *zf) { - return zf->fp ? ftello(zf->fp) : -1; -} - -int zfseeko(zfp *zf, off_t offset, int whence) { - return zf->fp ? fseeko(zf->fp, offset, whence) : -1; -} - - -/* - * A wrapper for either fgets or gzgets depending on what has been - * opened. - */ -char *zfgets(char *line, int size, zfp *zf) { - if (zf->fp) - return fgets(line, size, zf->fp); - else - return gzgets(zf->gz, line, size); -} - -/* - * A wrapper for either fputs or gzputs depending on what has been - * opened. - */ -int zfputs(char *line, zfp *zf) { - if (zf->fp) - return fputs(line, zf->fp); - else - return gzputs(zf->gz, line) ? 0 : EOF; -} - -/* - * Peeks at and returns the next character without consuming it from the - * input. (Ie a combination of getc and ungetc). - */ -int zfpeek(zfp *zf) { - int c; - - if (zf->fp) { - c = getc(zf->fp); - if (c != EOF) - ungetc(c, zf->fp); - } else { - c = gzgetc(zf->gz); - if (c != EOF) - gzungetc(c, zf->gz); - } - - return c; -} - -/* A replacement for either feof of gzeof */ -int zfeof(zfp *zf) { - return zf->fp ? feof(zf->fp) : gzeof(zf->gz); -} - -/* A replacement for either fopen or gzopen */ -zfp *zfopen(const char *path, const char *mode) { - char path2[1024]; - zfp *zf; - - if (!(zf = (zfp *)malloc(sizeof(*zf)))) - return NULL; - zf->fp = NULL; - zf->gz = NULL; - - /* Try normal fopen */ - if (mode[0] != 'z' && mode[1] != 'z' && - NULL != (zf->fp = fopen(path, mode))) { - unsigned char magic[2]; - if (2 != fread(magic, 1, 2, zf->fp)) { - free(zf); - return NULL; - } - if (!(magic[0] == 0x1f && - magic[1] == 0x8b)) { - fseeko(zf->fp, 0, SEEK_SET); - return zf; - } - - fclose(zf->fp); - zf->fp = NULL; - } - - if ((zf->gz = gzopen(path, mode))) - return zf; - - if (!strchr(mode, 'w')) { - sprintf(path2, "%.*s.gz", 1020, path); - if ((zf->gz = gzopen(path2, mode))) - return zf; - } - - free(zf); - return NULL; -} - -int zfclose(zfp *zf) { - int r = (zf->fp) ? fclose(zf->fp) : gzclose(zf->gz); - free(zf); - return r; -} diff --git a/cram/zfio.h b/cram/zfio.h deleted file mode 100644 index ab9c9c97c..000000000 --- a/cram/zfio.h +++ /dev/null @@ -1,62 +0,0 @@ -/* -Copyright (c) 2009-2013 Genome Research Ltd. -Author: James Bonfield - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - - 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger -Institute nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifndef _ZFIO_H_ -#define _ZFIO_H_ - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Either a gzFile or a FILE. - */ -typedef struct { - FILE *fp; - gzFile gz; -} zfp; - -off_t zftello(zfp *zf); -int zfseeko(zfp *zf, off_t offset, int whence); -char *zfgets(char *line, int size, zfp *zf); -int zfputs(char *line, zfp *zf); -zfp *zfopen(const char *path, const char *mode); -int zfclose(zfp *zf); -int zfpeek(zfp *zf); -int zfeof(zfp *zf); - -#ifdef __cplusplus -} -#endif - -#endif /* _ZFIO_H_ */ diff --git a/htslib.mk b/htslib.mk index a64ac5005..f1bbe2329 100644 --- a/htslib.mk +++ b/htslib.mk @@ -141,9 +141,7 @@ HTSLIB_ALL = \ $(HTSDIR)/cram/string_alloc.c \ $(HTSDIR)/cram/string_alloc.h \ $(HTSDIR)/cram/vlen.c \ - $(HTSDIR)/cram/vlen.h \ - $(HTSDIR)/cram/zfio.c \ - $(HTSDIR)/cram/zfio.h + $(HTSDIR)/cram/vlen.h $(HTSDIR)/config.h: +cd $(HTSDIR) && $(MAKE) config.h