-
Notifications
You must be signed in to change notification settings - Fork 19
Add lz4hdf5 #50
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add lz4hdf5 #50
Changes from 3 commits
b7e3891
c866045
70dff22
2c1fc5f
e7eb6e1
d03247f
57e032c
cf26640
5d6c752
27f0abe
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| TOP=../.. | ||
| include $(TOP)/configure/CONFIG | ||
| #---------------------------------------- | ||
| # ADD MACRO DEFINITIONS AFTER THIS LINE | ||
| #============================= | ||
|
|
||
| INC += lz4hdf5.h | ||
|
|
||
| LIBRARY_IOC += lz4hdf5 | ||
| lz4hdf5_SRCS += lz4hdf5.c | ||
| lz4hdf5_LIBS += bitshuffle | ||
| lz4hdf5_SYS_LIBS_WIN32 += ws2_32 | ||
|
|
||
| include $(TOP)/configure/RULES | ||
| #---------------------------------------- | ||
| # ADD RULES AFTER THIS LINE |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,174 @@ | ||
| /* | ||
| * This file implements lz4 compression/decompression where the data | ||
| * has a 12-byte header that specified the block size and other information | ||
| * It is different from the lz4 compression in ADSupport that does not have this header | ||
| * This file is needed because Dectris Stream2 sends lz4 data with smaller block size than | ||
| * the default, so the lz4 decompression won't work. | ||
| * | ||
| * This code is modified from H5Zlz4.c in the HDF5 library. | ||
| */ | ||
|
|
||
| #include <sys/types.h> | ||
| #include <stdint.h> | ||
| #include <stdlib.h> | ||
| #include <string.h> | ||
| #include <stdio.h> | ||
| #if defined(_WIN32) | ||
| #include <winsock2.h> | ||
| #else | ||
| #include <arpa/inet.h> | ||
| #endif | ||
| #include <lz4.h> | ||
| #include <epicsExport.h> | ||
| #include <lz4hdf5.h> | ||
|
|
||
| #define htonll(x) ( ( (uint64_t)(htonl( (uint32_t)((x << 32) >> 32)))<< 32) | htonl( ((uint32_t)(x >> 32)) )) | ||
| #define ntohll(x) htonll(x) | ||
|
|
||
| #define htobe16t(x) htons(x) | ||
| #define htobe32t(x) htonl(x) | ||
| #define htobe64t(x) htonll(x) | ||
| #define be16toht(x) ntohs(x) | ||
| #define be32toht(x) ntohl(x) | ||
| #define be64toht(x) ntohll(x) | ||
|
Comment on lines
+28
to
+33
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The 16-bit variants aren't used in this implementation.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would rather leave them in to minimize differences from H5Zlz4.c. |
||
|
|
||
|
|
||
| #define DEFAULT_BLOCK_SIZE 1<<30; /* 1GB. LZ4 needs blocks < 1.9GB. */ | ||
|
|
||
| epicsShareFunc size_t decompress_lz4hdf5(const char *inbuf, char *outbuf, size_t maxOutputSize) | ||
| { | ||
| size_t ret_value; | ||
|
|
||
| uint32_t *i32Buf; | ||
| uint32_t blockSize; | ||
| char *roBuf; /* pointer to current write position */ | ||
| uint64_t decompSize; | ||
| const char* rpos = inbuf; /* pointer to current read position */ | ||
| const uint64_t * const i64Buf = (uint64_t *) rpos; | ||
| const uint64_t origSize = (uint64_t)(be64toht(*i64Buf));/* is saved in be format */ | ||
| if (origSize > maxOutputSize) { | ||
| printf("maxOutputSize=%d too small, needs to be at least %d\n", (int)maxOutputSize, (int)origSize); | ||
| return 0; | ||
| } | ||
| rpos += 8; /* advance the pointer */ | ||
|
|
||
| i32Buf = (uint32_t*)rpos; | ||
| blockSize = (uint32_t)(be32toht(*i32Buf)); | ||
| rpos += 4; | ||
| if(blockSize>origSize) | ||
| blockSize = (uint32_t)origSize; | ||
|
|
||
| roBuf = (char*)outbuf; /* pointer to current write position */ | ||
| decompSize = 0; | ||
| /// start with the first block /// | ||
| while(decompSize < origSize) | ||
| { | ||
| uint32_t compressedBlockSize; /// is saved in be format | ||
|
|
||
| if(origSize-decompSize < blockSize) /* the last block can be smaller than blockSize. */ | ||
| blockSize = (uint32_t)(origSize-decompSize); | ||
| i32Buf = (uint32_t*)rpos; | ||
| compressedBlockSize = be32toht(*i32Buf); /// is saved in be format | ||
| rpos += 4; | ||
| if(compressedBlockSize == blockSize) /* there was no compression */ | ||
| { | ||
| memcpy(roBuf, rpos, blockSize); | ||
| } | ||
| else /* do the decompression */ | ||
| { | ||
| #if LZ4_VERSION_NUMBER > 10300 | ||
| int compressedBytes = LZ4_decompress_fast(rpos, roBuf, blockSize); | ||
| #else | ||
| int compressedBytes = LZ4_uncompress(rpos, roBuf, blockSize); | ||
| #endif | ||
| if(compressedBytes != compressedBlockSize) | ||
| { | ||
| printf("decompressed size not the same: %d, != %d\n", compressedBytes, compressedBlockSize); | ||
| return 0; | ||
| } | ||
| } | ||
|
|
||
| rpos += compressedBlockSize; /* advance the read pointer to the next block */ | ||
| roBuf += blockSize; /* advance the write pointer */ | ||
| decompSize += blockSize; | ||
| } | ||
| ret_value = (size_t)origSize; // should always work, as orig_size cannot be > 2GB (sizeof(size_t) < 4GB) | ||
| return ret_value; | ||
| } | ||
|
|
||
| epicsShareFunc size_t compress_lz4hdf5(const char *inbuf, char *outbuf, size_t nbytes, size_t maxOutputSize) | ||
|
||
| { | ||
| size_t blockSize; | ||
| size_t nBlocks; | ||
| size_t outSize; /* size of the output buffer. Header size (12 bytes) is included */ | ||
| size_t block; | ||
| uint64_t *i64Buf; | ||
| uint32_t *i32Buf; | ||
| size_t maxDestSize; | ||
| const char *rpos; /* pointer to current read position */ | ||
| char *roBuf; /* pointer to current write position */ | ||
|
|
||
| if (nbytes > INT32_MAX) | ||
| { | ||
| /* can only compress chunks up to 2GB */ | ||
| return 0; | ||
| } | ||
|
|
||
| blockSize = DEFAULT_BLOCK_SIZE; | ||
| if(blockSize > nbytes) | ||
| { | ||
| blockSize = nbytes; | ||
| } | ||
| nBlocks = (nbytes-1)/blockSize +1; | ||
| maxDestSize = LZ4_compressBound((int)nbytes) + 4 + 8 + nBlocks*4; | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think this is accurate, per https://github.com/lz4/lz4/blob/9da37b2eebf082bfab6e57c49be71cc41119a40d/lib/lz4.h#L215 , if we do intend to support multiple blocks in the output. I think it should be
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I took my code directly from the HDF5 source here, which has maxDestSize defined as I do:
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That seems like an oversight... I will open an issue about it there, then. |
||
| if (maxDestSize > maxOutputSize) { | ||
| printf("maxOutputSize=%d too small, needs to be at least %d\n", (int)maxOutputSize, (int)maxDestSize); | ||
| return 0; | ||
| } | ||
|
|
||
| rpos = inbuf; /* pointer to current read position */ | ||
| roBuf = outbuf; /* pointer to current write position */ | ||
| /* header */ | ||
| i64Buf = (uint64_t *) (roBuf); | ||
| i64Buf[0] = htobe64t((uint64_t)nbytes); /* Store decompressed size in be format */ | ||
| roBuf += 8; | ||
|
|
||
| i32Buf = (uint32_t *) (roBuf); | ||
| i32Buf[0] = htobe32t((uint32_t)blockSize); /* Store the block size in be format */ | ||
| roBuf += 4; | ||
|
|
||
| outSize = 12; /* size of the output buffer. Header size (12 bytes) is included */ | ||
|
|
||
| for(block = 0; block < nBlocks; ++block) | ||
| { | ||
| uint32_t compBlockSize; /// reserve space for compBlockSize | ||
| size_t origWritten = block*blockSize; | ||
| if(nbytes - origWritten < blockSize) /* the last block may be < blockSize */ | ||
| blockSize = nbytes - origWritten; | ||
|
|
||
| #if LZ4_VERSION_NUMBER > 10300 | ||
| compBlockSize = LZ4_compress_default(rpos, roBuf+4, (int)blockSize, (int)(maxDestSize-outSize)); /// reserve space for compBlockSize | ||
| #else | ||
| compBlockSize = LZ4_compress(rpos, roBuf+4, blockSize); /// reserve space for compBlockSize | ||
| #endif | ||
| if(!compBlockSize) | ||
| return 0; | ||
| if(compBlockSize >= blockSize) /* compression did not save any space, do a memcpy instead */ | ||
| { | ||
| compBlockSize = (uint32_t)blockSize; | ||
| memcpy(roBuf+4, rpos, blockSize); | ||
| } | ||
|
|
||
| i32Buf = (uint32_t *) (roBuf); | ||
| i32Buf[0] = htobe32t((uint32_t)compBlockSize); /* write blocksize */ | ||
| roBuf += 4; | ||
|
|
||
| rpos += blockSize; /* advance read pointer */ | ||
| roBuf += compBlockSize; /* advance write pointer */ | ||
| outSize += compBlockSize + 4; | ||
| } | ||
|
|
||
| return outSize; | ||
|
|
||
| } | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| #ifndef LZ4HDF5_H | ||
| #define LZ4HDF5_H | ||
|
|
||
| #include <shareLib.h> | ||
|
|
||
| #ifdef __cplusplus | ||
| extern "C" { | ||
| #endif /* __cplusplus */ | ||
|
|
||
| epicsShareFunc size_t decompress_lz4hdf5(const char *inbuf, char *outbuf, size_t maxOutputSize); | ||
| epicsShareFunc size_t compress_lz4hdf5(const char *inbuf, char *outbuf, size_t nbytes, size_t maxOutputSize); | ||
|
|
||
| #ifdef __cplusplus | ||
| } | ||
| #endif /* __cplusplus */ | ||
|
|
||
| #endif /* LZ4HDF5_H */ |


There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it would be nice to document that it's not only the header, there's the framing around each block that informs the size of the compressed data in that block.
Maybe with a link to the documentation of the data format? e.g. https://github.com/dectris/HDF5Plugin/blob/master/HDF5_LZ4.pdf
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done.