diff --git a/src/lib/maglevdyn.c b/src/lib/maglevdyn.c index 07f8c77..dc2802f 100644 --- a/src/lib/maglevdyn.c +++ b/src/lib/maglevdyn.c @@ -17,7 +17,7 @@ unsigned magDataDyn_len(unsigned M, unsigned N) if (M < 3) M = 3; M = primeBelow(M); - return sizeof(struct MagDataDynInternal) + sizeof(unsigned) * (3 + M * N) + return sizeof(struct MagDataDynInternal) + sizeof(unsigned) * 3 + sizeof(int) * (M + N); } @@ -37,17 +37,8 @@ void magDataDyn_init(unsigned M, unsigned N, void* mem, unsigned len) struct MagDataDyn m; magDataDyn_map(&m, mem); - for (int i = 0; i < m.N; i++) { - unsigned offset = rand() % m.M; - unsigned skip = rand() % (m.M - 1) + 1; - unsigned* row = m.permutation[i]; - for (unsigned j = 0; j < m.M; j++) { - row[j] = (offset + j * skip) % m.M; - } - m.active[i] = -1; - } - magDataDyn_populate(&m); - magDataDyn_free(&m); + memset(m.active, 0xff, sizeof(int)*N); + memset(m.lookup, 0xff, sizeof(int)*M); } void magDataDyn_map(struct MagDataDyn* m, void* mem) @@ -55,54 +46,62 @@ void magDataDyn_map(struct MagDataDyn* m, void* mem) struct MagDataDynInternal* mi = mem; m->M = mi->M; m->N = mi->N; - unsigned offset = sizeof(struct MagDataDynInternal); - m->lookup = mem + offset; - offset += (m->M * sizeof(int)); - m->permutation = malloc(m->N * sizeof(unsigned*)); - if (m->permutation == NULL) - die("Out of mem\n"); - unsigned i; - for (i = 0; i < m->N; i++) { - m->permutation[i] = mem + offset; - offset += (m->M * sizeof(unsigned)); - } - m->active = mem + offset; -} -void magDataDyn_free(struct MagDataDyn* m) -{ - free(m->permutation); + m->lookup = mem + sizeof(struct MagDataDynInternal); + m->active = mem + sizeof(struct MagDataDynInternal) + sizeof(int) * m->M; } +struct ActiveTarget { + int skip; + int c; + int idx; +}; + + void magDataDyn_populate(struct MagDataDyn* d) { - for (int i = 0; i < d->M; i++) { - d->lookup[i] = -1; - } + struct ActiveTarget* activeTargets = (struct ActiveTarget*) malloc(d->N * sizeof(struct ActiveTarget)); + if (activeTargets == NULL) die("Out of memory activeTargets"); + int num_targets = 0; - // Corner case; no active targets - unsigned nActive = 0; for (int i = 0; i < d->N; i++) { - if (d->active[i] >= 0) nActive++; + int offset = rand(); + int skip = rand(); + if (d->active[i] >= 0) { + activeTargets[num_targets].idx = i; + activeTargets[num_targets].c = offset % d->M; + // The old algorithm went "upwards" with random skip values in [1, M-1] + // and the next element in the permutation table was given by current+skip % M + // But we can compute the next element much faster if we go "downwards", + // using another skip' value such that skip + skip' == M + // The two methods yield the same permutation sequence. + activeTargets[num_targets].skip = d->M - ((skip % (d->M - 1)) + 1); + num_targets++; + } } - if (nActive == 0) return; + if (num_targets < 2) { // Corner cases: no active targets or just 1 active target + int w = num_targets == 0 ? -1 : activeTargets[0].idx; + for (int i = 0; i < d->M; i++) { + d->lookup[i] = w; + } + free(activeTargets); + return; + } + + int* tmpLookup = (int*) malloc(d->M * sizeof(int)); + if (tmpLookup == NULL) die ("Out of memory tmpLookup"); + memset(tmpLookup, 0xff, sizeof(int)*d->M); - unsigned next[d->N], c = 0; - memset(next, 0, sizeof(next)); - unsigned n = 0; - unsigned* row; - for (;;) { - for (int i = 0; i < d->N; i++) { - if (d->active[i] < 0) continue; /* Target not active */ - row = d->permutation[i]; - c = row[next[i]]; - while (d->lookup[c] >= 0) { - next[i] = next[i] + 1; - c = row[next[i]]; - } - d->lookup[c] = i; - next[i] = next[i] + 1; - n = n + 1; - if (n == d->M) return; + int k = 0; + for (int n = 0; n < d->M; n++) { + int c = activeTargets[k].c; + while (tmpLookup[c] >= 0) { + c = compute_next_element_in_permutation(c, activeTargets[k].skip, d->M); } + tmpLookup[c] = activeTargets[k].idx; + activeTargets[k].c = c = compute_next_element_in_permutation(c, activeTargets[k].skip, d->M); + k = k < num_targets - 1 ? k+1 : 0; } + memcpy(d->lookup, tmpLookup, sizeof(int) * d->M); + free(tmpLookup); + free(activeTargets); } diff --git a/src/lib/maglevdyn.h b/src/lib/maglevdyn.h index dd1f069..93e9581 100644 --- a/src/lib/maglevdyn.h +++ b/src/lib/maglevdyn.h @@ -2,8 +2,7 @@ struct MagDataDyn { unsigned M, N; - int *lookup; - unsigned** permutation; + int* lookup; int* active; }; @@ -25,9 +24,20 @@ void magDataDyn_init(unsigned M, unsigned N, void* mem, unsigned len); Must call magDataDyn_free() to free allocated memory. */ void magDataDyn_map(struct MagDataDyn* m, void* mem); -void magDataDyn_free(struct MagDataDyn* m); /* Call when the "active" array is updated */ void magDataDyn_populate(struct MagDataDyn* m); + + +// This is equivalent to +// currentValue - skip >= 0 ? currentValue - skip : currentValue - skip + mod +// but much faster than any conditional branching. In fact, this is as fast +// as retrieving the values from a pre-computed table. +static inline int compute_next_element_in_permutation(int currentValue, int skip, int mod) +{ + int v = currentValue - skip; + v += mod & ((v >= 0) - 1); + return v; +} diff --git a/src/lib/test/maglevdyn-test.c b/src/lib/test/maglevdyn-test.c index e41de66..4265070 100644 --- a/src/lib/test/maglevdyn-test.c +++ b/src/lib/test/maglevdyn-test.c @@ -17,18 +17,18 @@ static void targetAddRemove(unsigned M, unsigned N, unsigned A, float lim); static int cmdTest(int argc, char **argv); +static void magDataDyn_populate_test(struct MagDataDyn* d); +static void testFastComputationEquevalence(int M); +static int rand_seed = 0; int main(int argc, char* argv[]) { - srand(time(NULL)); - if (argc > 1) return cmdTest(argc, argv); - unsigned int M=1000, N=100, len, i, j; + unsigned int M=1000, N=100, len, i; struct MagDataDyn m; void* mem; - unsigned* row; len = magDataDyn_len(M, N); mem = malloc(len); @@ -38,25 +38,19 @@ int main(int argc, char* argv[]) assert(m.M == primeBelow(M)); assert(m.N == N); for (i = 0; i < m.N; i++) { - row = m.permutation[i]; - assert((void*)row - (void*)m.lookup < len); - for (j = 0; j < m.M; j++) { - assert(row[j] < m.M); - } assert(m.active[i] == -1); assert(m.lookup[i] == -1); } m.active[0] = 100; - magDataDyn_populate(&m); + magDataDyn_populate_test(&m); for (i = 0; i < m.N; i++) { assert(m.lookup[i] == 0); } m.active[1] = 101; - magDataDyn_populate(&m); + magDataDyn_populate_test(&m); for (i = 0; i < m.N; i++) { assert(m.lookup[i] < 2); } - magDataDyn_free(&m); free(mem); /* @@ -67,14 +61,40 @@ int main(int argc, char* argv[]) targetAddRemove(109, 20, 10, 24.0); /* perfect = 10% */ targetAddRemove(1009, 20, 10, 13.0); /* perfect = 10% */ targetAddRemove(10009, 100, 50, 5.0); /* perfect = 2% */ + testFastComputationEquevalence(37); printf("==== maglevdyn-test OK\n"); return 0; } +static void testFastComputationEquevalence(int M) +{ + int permutations[65536]; + M = primeBelow(M); + assert (M < sizeof(permutations)/sizeof(permutations[0])); + + for (int offset = 0; offset < M; offset++ ) { + for (int skip = 1; skip < M; skip++) { + // Old method with permutation table + int value = offset; + for (int i = 0; i < M; i++) { + permutations[i] = value; + value = (value + skip) % M; + } + // New method with on-the-fly compute: + value = offset; + int skip_bar = M - skip; + for (int i = 0; i < M; i++) { + assert (permutations[i] == value); + value = compute_next_element_in_permutation(value, skip_bar, M); + } + } + } +} static void* create(unsigned M, unsigned N) { + rand_seed = time(NULL); unsigned len = magDataDyn_len(M, N); void* mem = malloc(len); magDataDyn_init(M, N, mem, len); @@ -94,7 +114,7 @@ static float addTargets(void* mem, unsigned n) n--; } } - magDataDyn_populate(&m); + magDataDyn_populate_test(&m); // Compute the update impact in percent unsigned ndiff = 0; @@ -102,7 +122,6 @@ static float addTargets(void* mem, unsigned n) if (lookup[i] != m.lookup[i]) ndiff++; } - magDataDyn_free(&m); return 100.0 * (float)ndiff / (float)m.M; } @@ -119,7 +138,7 @@ static float removeTargets(void* mem, unsigned n) n--; } } - magDataDyn_populate(&m); + magDataDyn_populate_test(&m); // Compute the update impact in percent unsigned ndiff = 0; @@ -127,7 +146,6 @@ static float removeTargets(void* mem, unsigned n) if (lookup[i] != m.lookup[i]) ndiff++; } - magDataDyn_free(&m); return 100.0 * (float)ndiff / (float)m.M; } @@ -180,3 +198,8 @@ static int cmdTest(int argc, char **argv) } +static void magDataDyn_populate_test(struct MagDataDyn* d) +{ + srand(rand_seed); + magDataDyn_populate(d); +} diff --git a/src/nfqlb/cmdFlowLb.c b/src/nfqlb/cmdFlowLb.c index 24def6b..e1bc320 100644 --- a/src/nfqlb/cmdFlowLb.c +++ b/src/nfqlb/cmdFlowLb.c @@ -411,7 +411,6 @@ STATIC void loadbalancerRelease(struct LoadBalancer* lb) munmap(lb->st, statbuf.st_size); close(lb->fd); free(lb->target); - magDataDyn_free(&lb->magd); free(lb); } } diff --git a/src/nfqlb/cmdShm.c b/src/nfqlb/cmdShm.c index 5d9ca98..8ec8a4e 100644 --- a/src/nfqlb/cmdShm.c +++ b/src/nfqlb/cmdShm.c @@ -128,7 +128,6 @@ static int cmdShow(int argc, char **argv) printf(" %d(%d)", magd.active[i], i); } printf("\n"); - magDataDyn_free(&magd); return 0; }