Hi -
While browsing through some detailed profile info, I came across a
case where __pmHashSearch was taking a noticeable amount of time;
regularly 4ish% of the total cpu. (This workload is my usual
cpu-hungry case of pmwebd scanning many large archives in parallel, to
populate grafana graphs.)
An assembler-level profile suggested looking at the div instruction
coming from the "[key % hcp->hsize]" style computation. On a lark, I
applied a patch (below) that limits hsize to powers of two, thus
letting us replace the division by a boolean-and: "[key & (hcp->hsize-1)]".
The code appears to work ... and halves the fraction of CPU time due
to __pmHashSearch ... but annoyingly, overall CPU consumption stays
about the same. So I'm attaching it just as a curiosity, in case
someone's motivated to think about it more.
diff --git a/src/include/pcp/impl.h b/src/include/pcp/impl.h
index 84dfa4f3c136..bd5f846a6e93 100644
--- a/src/include/pcp/impl.h
+++ b/src/include/pcp/impl.h
@@ -335,7 +335,7 @@ typedef struct __pmHashNode {
typedef struct __pmHashCtl {
int nodes;
- int hsize;
+ int hsize; /* a power of 2 */
__pmHashNode **hash;
__pmHashNode *next;
unsigned int index;
diff --git a/src/libpcp/src/hash.c b/src/libpcp/src/hash.c
index 9bdf97fb25ac..3365a0f9f562 100644
--- a/src/libpcp/src/hash.c
+++ b/src/libpcp/src/hash.c
@@ -31,7 +31,7 @@ __pmHashSearch(unsigned int key, __pmHashCtl *hcp)
if (hcp->hsize == 0)
return NULL;
- for (hp = hcp->hash[key % hcp->hsize]; hp != NULL; hp = hp->next) {
+ for (hp = hcp->hash[key & (hcp->hsize-1)]; hp != NULL; hp = hp->next) {
if (hp->key == key)
return hp;
}
@@ -59,9 +59,11 @@ __pmHashAdd(unsigned int key, void *data, __pmHashCtl *hcp)
int oldsize = hcp->hsize;
hcp->hsize *= 2;
- if (hcp->hsize % 2) hcp->hsize++;
- if (hcp->hsize % 3) hcp->hsize += 2;
- if (hcp->hsize % 5) hcp->hsize += 2;
+ /* Simply double hsize, to keep it a power of 2.
+ if (hcp->hsize % 2) hcp->hsize++;
+ if (hcp->hsize % 3) hcp->hsize += 2;
+ if (hcp->hsize % 5) hcp->hsize += 2;
+ */
if ((hcp->hash = (__pmHashNode **)calloc(hcp->hsize,
sizeof(__pmHashNode *))) == NULL) {
hcp->hsize = oldsize;
hcp->hash = old;
@@ -74,7 +76,7 @@ __pmHashAdd(unsigned int key, void *data, __pmHashCtl *hcp)
for (hp = old[--oldsize]; hp != NULL; ) {
tp = hp;
hp = hp->next;
- k = tp->key % hcp->hsize;
+ k = tp->key & (hcp->hsize-1);
tp->next = hcp->hash[k];
hcp->hash[k] = tp;
}
@@ -85,7 +87,7 @@ __pmHashAdd(unsigned int key, void *data, __pmHashCtl *hcp)
if ((hp = (__pmHashNode *)malloc(sizeof(__pmHashNode))) == NULL)
return -oserror();
- k = key % hcp->hsize;
+ k = key & (hcp->hsize-1);
hp->key = key;
hp->data = data;
hp->next = hcp->hash[k];
@@ -103,10 +105,10 @@ __pmHashDel(unsigned int key, void *data, __pmHashCtl
*hcp)
if (hcp->hsize == 0)
return 0;
- for (hp = hcp->hash[key % hcp->hsize]; hp != NULL; hp = hp->next) {
+ for (hp = hcp->hash[key & (hcp->hsize-1)]; hp != NULL; hp = hp->next) {
if (hp->key == key && hp->data == data) {
if (lhp == NULL)
- hcp->hash[key % hcp->hsize] = hp->next;
+ hcp->hash[key & (hcp->hsize-1)] = hp->next;
else
lhp->next = hp->next;
free(hp);
|