diff --git a/src/bloom.c b/src/bloom.c index 1e94ef4..71d13f5 100644 --- a/src/bloom.c +++ b/src/bloom.c @@ -42,8 +42,8 @@ static const unsigned char bits_set_table[256] = {B6(0), B6(1), B6(1), B6(2)}; /******************************************************************************* *** PRIVATE FUNCTIONS *******************************************************************************/ -static uint64_t* __default_hash(int num_hashes, const char *str); -static uint64_t __fnv_1a(const char *key, int seed); +static uint64_t* __default_hash(int num_hashes, const uint8_t *str, const size_t str_len); +static uint64_t __fnv_1a(const uint8_t *key, const size_t key_len, int seed); static void __calculate_optimal_hashes(BloomFilter *bf); static void __read_from_file(BloomFilter *bf, FILE *fp, short on_disk, const char *filename); static void __write_to_file(BloomFilter *bf, FILE *fp, short on_disk); @@ -143,22 +143,29 @@ void bloom_filter_stats(BloomFilter *bf) { } int bloom_filter_add_string(BloomFilter *bf, const char *str) { - uint64_t *hashes = bloom_filter_calculate_hashes(bf, str, bf->number_hashes); + return bloom_filter_add_uint8_str(bf, (const uint8_t *) str, strlen(str)); +} + +int bloom_filter_add_uint8_str(BloomFilter *bf, const uint8_t *str, const size_t str_len) { + uint64_t *hashes = bloom_filter_calculate_hashes(bf, str, str_len, bf->number_hashes); int res = bloom_filter_add_string_alt(bf, hashes, bf->number_hashes); free(hashes); return res; } - int bloom_filter_check_string(BloomFilter *bf, const char *str) { - uint64_t *hashes = bloom_filter_calculate_hashes(bf, str, bf->number_hashes); + return bloom_filter_check_uint8_str(bf, (const uint8_t *) str, strlen(str)); +} + +int bloom_filter_check_uint8_str(BloomFilter *bf, const uint8_t *str, const size_t str_len) { + uint64_t *hashes = bloom_filter_calculate_hashes(bf, str, str_len, bf->number_hashes); int res = bloom_filter_check_string_alt(bf, hashes, bf->number_hashes); free(hashes); return res; } -uint64_t* bloom_filter_calculate_hashes(BloomFilter *bf, const char *str, unsigned int number_hashes) { - return bf->hash_function(number_hashes, str); +uint64_t* bloom_filter_calculate_hashes(BloomFilter *bf, const uint8_t *str, const size_t str_len, unsigned int number_hashes) { + return bf->hash_function(number_hashes, str, str_len); } /* Add a string to a bloom filter using the defined hashes */ @@ -487,18 +494,18 @@ static void __update_elements_added_on_disk(BloomFilter* bf) { } /* NOTE: The caller will free the results */ -static uint64_t* __default_hash(int num_hashes, const char *str) { +static uint64_t* __default_hash(int num_hashes, const uint8_t *str, const size_t str_len) { uint64_t *results = (uint64_t*)calloc(num_hashes, sizeof(uint64_t)); int i; for (i = 0; i < num_hashes; ++i) { - results[i] = __fnv_1a(str, i); + results[i] = __fnv_1a(str, str_len, i); } return results; } -static uint64_t __fnv_1a(const char *key, int seed) { +static uint64_t __fnv_1a(const uint8_t *key, const size_t len, int seed) { // FNV-1a hash (http://www.isthe.com/chongo/tech/comp/fnv/) - int i, len = strlen(key); + size_t i; uint64_t h = 14695981039346656037ULL + (31 * seed); // FNV_OFFSET 64 bit with magic number seed for (i = 0; i < len; ++i){ h = h ^ (unsigned char) key[i]; diff --git a/src/bloom.h b/src/bloom.h index 8549988..29c00c9 100644 --- a/src/bloom.h +++ b/src/bloom.h @@ -39,7 +39,7 @@ extern "C" { #define bloom_filter_get_version() (BLOOMFILTER_VERSION) -typedef uint64_t* (*BloomHashFunction) (int num_hashes, const char *str); +typedef uint64_t* (*BloomHashFunction) (int num_hashes, const uint8_t *str, const size_t str_len); typedef struct bloom_filter { /* bloom parameters */ @@ -116,12 +116,18 @@ int bloom_filter_clear(BloomFilter *bf); /* Add a string (or element) to the bloom filter */ int bloom_filter_add_string(BloomFilter *bf, const char *str); +/* Add a uint8_t string (or element) to the bloom filter */ +int bloom_filter_add_uint8_str(BloomFilter *bf, const uint8_t *str, const size_t str_len); + /* Add a string to a bloom filter using the defined hashes */ int bloom_filter_add_string_alt(BloomFilter *bf, uint64_t *hashes, unsigned int number_hashes_passed); /* Check to see if a string (or element) is or is not in the bloom filter */ int bloom_filter_check_string(BloomFilter *bf, const char *str); +/* Check to see if a uint8_t string (or element) is or is not in the bloom filter */ +int bloom_filter_check_uint8_str(BloomFilter *bf, const uint8_t *str, const size_t str_len); + /* Check if a string is in the bloom filter using the passed hashes */ int bloom_filter_check_string_alt(BloomFilter *bf, uint64_t *hashes, unsigned int number_hashes_passed); @@ -144,7 +150,7 @@ void bloom_filter_set_elements_to_estimated(BloomFilter *bf); /* Generate the desired number of hashes for the provided string NOTE: It is up to the caller to free the allocated memory */ -uint64_t* bloom_filter_calculate_hashes(BloomFilter *bf, const char *str, unsigned int number_hashes); +uint64_t* bloom_filter_calculate_hashes(BloomFilter *bf, const uint8_t *str, const size_t str_len, unsigned int number_hashes); /* Calculate the size the bloom filter will take on disk when exported in bytes */ uint64_t bloom_filter_export_size(BloomFilter *bf); diff --git a/tests/bloom_test.c b/tests/bloom_test.c index f430e59..be428b4 100644 --- a/tests/bloom_test.c +++ b/tests/bloom_test.c @@ -28,8 +28,8 @@ int check_unknown_values_alt(BloomFilter *bf, int mult, int mult2, int offset, i int check_unknown_values_alt_2(BloomFilter *bf, int mult, int mult2, int offset, int* used); void success_or_failure(int res); void populate_bloom_filter(BloomFilter *bf, unsigned long long elements, int mult); -static uint64_t __fnv_1a_mod(const char *key); -static uint64_t* __default_hash_mod(int num_hashes, const char *str); +static uint64_t __fnv_1a_mod(const uint8_t *key, const size_t str_len); +static uint64_t* __default_hash_mod(int num_hashes, const uint8_t *str, const size_t str_len); @@ -446,22 +446,22 @@ void success_or_failure(int res) { } /* NOTE: The caller will free the results */ -static uint64_t* __default_hash_mod(int num_hashes, const char *str) { +static uint64_t* __default_hash_mod(int num_hashes, const uint8_t *str, const size_t str_len) { uint64_t *results = (uint64_t*)calloc(num_hashes, sizeof(uint64_t)); int i; char *key = (char*)calloc(17, sizeof(char)); // largest value is 7FFF,FFFF,FFFF,FFFF - results[0] = __fnv_1a_mod(str); + results[0] = __fnv_1a_mod(str, str_len); for (i = 1; i < num_hashes; ++i) { sprintf(key, "%" PRIx64 "", results[i-1]); - results[i] = __fnv_1a_mod(key); + results[i] = __fnv_1a_mod((const uint8_t *) key, strlen(key)); } free(key); return results; } -static uint64_t __fnv_1a_mod(const char *key) { +static uint64_t __fnv_1a_mod(const uint8_t *key, const size_t len) { // FNV-1a hash (http://www.isthe.com/chongo/tech/comp/fnv/) - int i, len = strlen(key); + size_t i; uint64_t h = 14695981039346656073ULL; // FNV_OFFSET 64 bit for (i = 0; i < len; ++i) { h = h ^ (unsigned char) key[i];